from transformers import AutoTokenizer, AutoModelForCausalLM tok = AutoTokenizer.from_pretrained("/projects/llama-cpt/models/loopllama", trust_remote_code=True) m = AutoModelForCausalLM.from_pretrained("/projects/llama-cpt/models/loopllama", trust_remote_code=True) out = m(**tok("hello", return_tensors="pt")) print(out.logits.shape) # [1, seq_len, vocab_size]