huihui-ai/phi-4-abliterated
This is an uncensored version of microsoft/phi-4 created with abliteration (see remove-refusals-with-transformers to know more about it).
This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens.
Note
Suggested tokenizer changes by Unsloth.ai
load model
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
def load_model_and_tokenizer(model_path=None):
global model, tokenizer, EOT_ID
logging.info("Initializing Androna-FP8 with native Mistral-Nemo logic...")
tokenizer = AutoTokenizer.from_pretrained(
MODEL_PATH,
trust_remote_code=True,
padding_side="left"
)
if hasattr(tokenizer, "fix_mistral_regex"):
tokenizer.fix_mistral_regex = True
im_start_id = tokenizer.convert_tokens_to_ids("<|im_start|>")
im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
if im_start_id == tokenizer.unk_token_id or im_end_id == tokenizer.unk_token_id:
logging.error("CRITICAL ERROR: ChatML tokens missing in tokenizer!")
raise ValueError("ChatML tokens missing from tokenizer vocabulary.")
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
device_map="auto",
dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16, # <-- DENNE M脜 V脝RE MED
attn_implementation="flash_attention_2",
trust_remote_code=True
)
model.eval()
for p in model.parameters():
p.requires_grad_(False)
model.config.use_cache = True
EOT_ID = im_end_id
model.config.pad_token_id = tokenizer.pad_token_id
model.config.eos_token_id = EOT_ID
model.generation_config.pad_token_id = tokenizer.pad_token_id
model.generation_config.eos_token_id = EOT_ID
model.generation_config.use_cache = True
return model, tokenizer
generate
import logging
import torch
tokenizer = None
model = None
EOT_ID = None
def generate_response(user_input, history, max_tokens=768, temperature=0.98, top_p=0.95, top_k=67, repetition_penalty=1.0):
messages = [{"role": "system", "content": persona}]
valid_history = []
expected_role = "user"
all_turns = []
if history:
all_turns.extend(history)
if not all_turns or all_turns[-1].get("content", "") != user_input:
all_turns.append({"role": "user", "content": user_input})
for msg in all_turns:
role = msg.get("role")
content = msg.get("content", "").strip()
if not content:
continue
if role == expected_role:
valid_history.append({"role": role, "content": content})
expected_role = "assistant" if expected_role == "user" else "user"
else:
if role == "assistant" and expected_role == "user":
valid_history.append({"role": "user", "content": "[System: Context restored]"})
valid_history.append({"role": "assistant", "content": content})
expected_role = "user"
# Sl氓 sammen doble user-meldinger
elif role == "user" and expected_role == "assistant":
if valid_history and valid_history[-1]["role"] == "user":
valid_history[-1]["content"] += "\n\n" + content
if valid_history and valid_history[-1]["role"] == "assistant":
valid_history.pop()
messages.extend(valid_history)
safe_rep_penalty = min(repetition_penalty, 1.05)
try:
encoding = tokenizer.apply_chat_template(
messages,
return_tensors="pt",
return_dict=True,
add_generation_prompt=True
)
input_ids = encoding.input_ids.to(model.device)
attention_mask = encoding.attention_mask.to(model.device)
except Exception as e:
logging.error(f"Error with apply_chat_template: {e}")
raise e
terminator = EOT_ID if EOT_ID is not None else tokenizer.eos_token_id
with torch.inference_mode():
output_ids = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty,
eos_token_id=terminator,
pad_token_id=tokenizer.pad_token_id,
use_cache=True,
cache_config={
"cache_dtype": torch.float8_e4m3fn,
}
)
reply_decoded = tokenizer.decode(
output_ids[0][input_ids.shape[-1]:],
skip_special_tokens=True
).strip()
full_decoded = tokenizer.decode(output_ids[0], skip_special_tokens=False)
return reply_decoded, full_decoded, False
quantization
https://huggingface.co/docs/transformers/quantization/finegrained_fp8#fine-grained-fp8
``
- Downloads last month
- 14
Model tree for ikarius/Phi-4-Abliterated-FineGrained-FP8
Base model
microsoft/phi-4