Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| import torch | |
| from config import Config | |
| from typing import Dict, Any | |
| import re | |
| # Mapping delle lingue supportate con i loro nomi per i modelli Helsinki-NLP | |
| LANGUAGE_CODES = { | |
| "en": "English", | |
| "fr": "French", | |
| "de": "German", | |
| "es": "Spanish", | |
| "pt": "Portuguese", | |
| "ru": "Russian", | |
| "ja": "Japanese", | |
| "ko": "Korean", | |
| "zh": "Chinese", | |
| "ar": "Arabic", | |
| "hi": "Hindi", | |
| "nl": "Dutch", | |
| "sv": "Swedish", | |
| "da": "Danish", | |
| "no": "Norwegian", | |
| "fi": "Finnish", | |
| "pl": "Polish", | |
| "cs": "Czech", | |
| "hu": "Hungarian", | |
| "ro": "Romanian", | |
| "bg": "Bulgarian", | |
| "hr": "Croatian", | |
| "sk": "Slovak", | |
| "sl": "Slovenian", | |
| "et": "Estonian", | |
| "lv": "Latvian", | |
| "lt": "Lithuanian", | |
| "mt": "Maltese", | |
| "el": "Greek", | |
| "tr": "Turkish" | |
| } | |
| # Mapping per i modelli di traduzione Helsinki-NLP (da italiano verso altre lingue) | |
| HELSINKI_MODELS = { | |
| "en": "Helsinki-NLP/opus-mt-it-en", | |
| "fr": "Helsinki-NLP/opus-mt-it-fr", | |
| "de": "Helsinki-NLP/opus-mt-it-de", | |
| "es": "Helsinki-NLP/opus-mt-it-es", | |
| "pt": "Helsinki-NLP/opus-mt-it-pt", | |
| "ru": "Helsinki-NLP/opus-mt-it-ru", | |
| "nl": "Helsinki-NLP/opus-mt-it-nl", | |
| "sv": "Helsinki-NLP/opus-mt-it-sv", | |
| "da": "Helsinki-NLP/opus-mt-it-da", | |
| "no": "Helsinki-NLP/opus-mt-it-no", | |
| "fi": "Helsinki-NLP/opus-mt-it-fi", | |
| "pl": "Helsinki-NLP/opus-mt-it-pl", | |
| "cs": "Helsinki-NLP/opus-mt-it-cs", | |
| "hu": "Helsinki-NLP/opus-mt-it-hu", | |
| "ro": "Helsinki-NLP/opus-mt-it-ro", | |
| "bg": "Helsinki-NLP/opus-mt-it-bg", | |
| "hr": "Helsinki-NLP/opus-mt-it-hr", | |
| "sk": "Helsinki-NLP/opus-mt-it-sk", | |
| "sl": "Helsinki-NLP/opus-mt-it-sl", | |
| "et": "Helsinki-NLP/opus-mt-it-et", | |
| "lv": "Helsinki-NLP/opus-mt-it-lv", | |
| "lt": "Helsinki-NLP/opus-mt-it-lt", | |
| "el": "Helsinki-NLP/opus-mt-it-el", | |
| "tr": "Helsinki-NLP/opus-mt-it-tr" | |
| } | |
| class TranslationService: | |
| def __init__(self, device: str = "cpu"): | |
| self.device = device | |
| self.translators = {} # Cache per i translator | |
| def _get_translator(self, target_language: str): | |
| """Ottiene o crea un translator per la lingua target.""" | |
| if target_language not in self.translators: | |
| if target_language in HELSINKI_MODELS: | |
| model_name = HELSINKI_MODELS[target_language] | |
| try: | |
| self.translators[target_language] = pipeline( | |
| "translation", | |
| model=model_name, | |
| device=0 if self.device == "cuda" else -1, | |
| torch_dtype=torch.float16 if self.device == "cuda" else torch.float32 | |
| ) | |
| except Exception as e: | |
| # Fallback per lingue senza modelli specifici | |
| print(f"Modello non disponibile per {target_language}, usando fallback: {e}") | |
| return None | |
| else: | |
| return None | |
| return self.translators[target_language] | |
| def _extract_placeholders(self, text: str) -> tuple[str, Dict[str, str]]: | |
| """Estrae i placeholders HTML e delle parentesi graffe dal testo.""" | |
| placeholders = {} | |
| # Pattern per HTML tags | |
| html_pattern = r'<[^>]+>' | |
| html_matches = re.findall(html_pattern, text) | |
| # Pattern per parentesi graffe | |
| brace_pattern = r'\{[^}]+\}' | |
| brace_matches = re.findall(brace_pattern, text) | |
| # Sostituisce HTML tags con placeholders | |
| processed_text = text | |
| for i, match in enumerate(html_matches): | |
| placeholder = f"HTMLTAG{i}" | |
| placeholders[placeholder] = match | |
| processed_text = processed_text.replace(match, placeholder, 1) | |
| # Sostituisce parentesi graffe con placeholders | |
| for i, match in enumerate(brace_matches): | |
| placeholder = f"PLACEHOLDER{i}" | |
| placeholders[placeholder] = match | |
| processed_text = processed_text.replace(match, placeholder, 1) | |
| return processed_text, placeholders | |
| def _restore_placeholders(self, text: str, placeholders: Dict[str, str]) -> str: | |
| """Ripristina i placeholders nel testo tradotto.""" | |
| for placeholder, original in placeholders.items(): | |
| # Rimuovi spazi extra attorno ai placeholder | |
| text = text.replace(f" {placeholder} ", original) | |
| text = text.replace(f" {placeholder}", original) | |
| text = text.replace(f"{placeholder} ", original) | |
| text = text.replace(placeholder, original) | |
| return text | |
| def translate_text(self, text: str, target_language: str) -> str: | |
| """Traduce il testo dall'italiano alla lingua target.""" | |
| if target_language not in LANGUAGE_CODES: | |
| raise ValueError(f"Lingua non supportata: {target_language}") | |
| # Estrai placeholders | |
| clean_text, placeholders = self._extract_placeholders(text) | |
| # Ottieni il translator | |
| translator = self._get_translator(target_language) | |
| if translator is None: | |
| # Traduzione semplice di fallback (mantiene il testo originale) | |
| return f"[TRANSLATION NOT AVAILABLE: {text}]" | |
| try: | |
| # Esegui la traduzione | |
| result = translator(clean_text, max_length=512) | |
| if isinstance(result, list) and len(result) > 0: | |
| translated_text = result[0]['translation_text'] | |
| else: | |
| translated_text = str(result) | |
| # Ripristina i placeholders | |
| translated_text = self._restore_placeholders(translated_text, placeholders) | |
| return translated_text.strip() | |
| except Exception as e: | |
| print(f"Errore durante la traduzione: {e}") | |
| return f"[TRANSLATION ERROR: {text}]" | |
| def translate_locale_object(self, locale_data: Dict[str, Any], target_language: str) -> Dict[str, Any]: | |
| """Traduce un oggetto locale completo.""" | |
| if target_language not in LANGUAGE_CODES: | |
| raise ValueError(f"Lingua non supportata: {target_language}") | |
| translated_data = {} | |
| for key, content in locale_data.items(): | |
| if isinstance(content, str): | |
| translated_data[key] = self.translate_text(content, target_language) | |
| else: | |
| # Mantieni il valore originale se non è una stringa | |
| translated_data[key] = content | |
| return translated_data | |
| # Istanza globale del servizio di traduzione | |
| translation_service = None | |
| def get_translation_service(): | |
| global translation_service | |
| if translation_service is None: | |
| config = Config() | |
| translation_service = TranslationService(device=config.DEVICE) | |
| return translation_service | |
| def translate_locale(locale_data: Dict[str, Any], target_language: str) -> Dict[str, Any]: | |
| """Funzione helper per tradurre un oggetto locale.""" | |
| service = get_translation_service() | |
| return service.translate_locale_object(locale_data, target_language) | |
| def get_supported_languages(): | |
| """Restituisce la lista delle lingue supportate.""" | |
| return list(HELSINKI_MODELS.keys()) | |