Try this model
Click to expand example usage
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from uroman import Uroman
import pandas as pd
# Load input
df = pd.read_csv("test.csv")
uroman = Uroman()
df["coptic_text_romanized"] = df["coptic_text"].apply(lambda t: uroman.romanize_string(str(t)))
# Load model
model_name = "chaouin/coptic-french-translation-hiero"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Translate
inputs = tokenizer(df["coptic_text_romanized"].tolist(), return_tensors="pt", padding=True, truncation=True)
output = model.generate(**inputs)
translations = [tokenizer.decode(t, skip_special_tokens=True) for t in output]
print(translations)
➡️ For a complete script to generate translations, see generate_translation_hiero.py
🔬 For full training and evaluation scripts, visit the project repository
- Downloads last month
- 1