Upload trained BERT-Tiny AMD model

Browse files

Files changed (13) hide show

.gitattributes +2 -0
README.md +11 -64
best_enhanced_progressive_amd.pth +3 -0
comprehensive_model_analysis.png +3 -0
config.json +7 -10
enhanced_validation_results.csv +0 -0
production_enhanced_amd.py +135 -0
production_enhanced_amd_standalone.py +269 -0
push_to_huggingface.py +288 -0
pytorch_model.bin +3 -0
rule_based_vs_bert_comparison.png +3 -0
simple_upload.py +239 -0
training_metadata.json +27 -77

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+comprehensive_model_analysis.png filter=lfs diff=lfs merge=lfs -text
+rule_based_vs_bert_comparison.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,7 +1,5 @@
 ---
 license: mit
-language:
-- en
 tags:
 - text-classification
 - answering-machine-detection
@@ -30,21 +28,19 @@ This model is based on `prajjwal1/bert-tiny` and fine-tuned to classify phone ca
 ## Performance
-- **Validation Accuracy**: 97.75%
-- **Precision**: 95.79%
-- **Recall**: 95.79%
-- **F1-Score**: 95.79%
 - **Training Device**: MPS (Apple Silicon GPU)
-- **Final Validation Loss**: 0.182
-- **Best Epoch**: 12 (with early stopping)
-- **Agreement with Rule-based System**: 97.75%
 ## Training Data
 - **Total Samples**: 3,548 phone call transcripts
 - **Training Set**: 2,838 samples
 - **Validation Set**: 710 samples
-- **Class Distribution**: 26.8% machine calls, 73.2% human calls
 - **Source**: ElevateNow call center data
 ## Usage
@@ -56,8 +52,8 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 # Load model and tokenizer
-model = AutoModelForSequenceClassification.from_pretrained("your-username/bert-tiny-amd")
-tokenizer = AutoTokenizer.from_pretrained("your-username/bert-tiny-amd")
 # Prepare input
 text = "Hello, this is John speaking"
@@ -74,51 +70,14 @@ print(f"Prediction: {'Machine' if is_machine else 'Human'}")
 print(f"Confidence: {probability:.4f}")
 ```
-### Production Usage
-```python
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
-class AMDClassifier:
-    def __init__(self, model_name="your-username/bert-tiny-amd"):
-        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        self.model.to(self.device)
-        self.model.eval()
-    def predict(self, transcript_text, threshold=0.5):
-        """Predict if transcript is from answering machine"""
-        inputs = self.tokenizer(
-            transcript_text,
-            return_tensors="pt",
-            max_length=128,
-            truncation=True,
-            padding=True
-        ).to(self.device)
-        with torch.no_grad():
-            outputs = self.model(**inputs)
-            logits = outputs.logits.squeeze(-1)
-            probability = torch.sigmoid(logits).item()
-            is_machine = probability >= threshold
-        return is_machine, probability
-# Usage
-classifier = AMDClassifier()
-is_machine, confidence = classifier.predict("Hello, this is John speaking")
-```
 ## Training Details
 - **Optimizer**: AdamW with weight decay (0.01)
 - **Learning Rate**: 3e-5 with linear scheduling
 - **Batch Size**: 32
-- **Epochs**: 12 (with early stopping)
 - **Early Stopping**: Patience of 3 epochs
-- **Class Imbalance**: Handled with positive weight (2.729)
 ## Limitations
@@ -127,18 +86,6 @@ is_machine, confidence = classifier.predict("Hello, this is John speaking")
 - Performance may vary with different transcription quality
 - Designed for short utterances (max 128 tokens)
-## Citation
-```bibtex
-@misc{bert-tiny-amd,
-  title={BERT-Tiny AMD Classifier for Answering Machine Detection},
-  author={Your Name},
-  year={2025},
-  publisher={Hugging Face},
-  howpublished={\url{https://huggingface.co/your-username/bert-tiny-amd}}
-}
-```
 ## License
-MIT License - see LICENSE file for details.

 ---
 license: mit
 tags:
 - text-classification
 - answering-machine-detection
 ## Performance
+- **Validation Accuracy**: 93.94%
+- **Precision**: 92.75%
+- **Recall**: 87.27%
+- **F1-Score**: 89.93%
 - **Training Device**: MPS (Apple Silicon GPU)
+- **Best Epoch**: 15 (with early stopping)
 ## Training Data
 - **Total Samples**: 3,548 phone call transcripts
 - **Training Set**: 2,838 samples
 - **Validation Set**: 710 samples
+- **Class Distribution**: 30.8% machine calls, 69.2% human calls
 - **Source**: ElevateNow call center data
 ## Usage
 import torch
 # Load model and tokenizer
+model = AutoModelForSequenceClassification.from_pretrained("Adya662/bert-tiny-amd")
+tokenizer = AutoTokenizer.from_pretrained("Adya662/bert-tiny-amd")
 # Prepare input
 text = "Hello, this is John speaking"
 print(f"Confidence: {probability:.4f}")
 ```
 ## Training Details
 - **Optimizer**: AdamW with weight decay (0.01)
 - **Learning Rate**: 3e-5 with linear scheduling
 - **Batch Size**: 32
+- **Epochs**: 15 (with early stopping)
 - **Early Stopping**: Patience of 3 epochs
+- **Class Imbalance**: Handled with positive weight
 ## Limitations
 - Performance may vary with different transcription quality
 - Designed for short utterances (max 128 tokens)
 ## License
+MIT License - see LICENSE file for details.

best_enhanced_progressive_amd.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f8c3c949e8963d27748803fe785af04652da64704533cfcdcdeae7505f0d328
+size 17598379

comprehensive_model_analysis.png ADDED Viewed

Git LFS Details

SHA256: e2aaeb9693f9eb222b368ff314cef0871bef070b807367a1f3af5faa32e28a3a
Pointer size: 131 Bytes
Size of remote file: 559 kB

config.json CHANGED Viewed

@@ -1,30 +1,27 @@
 {
   "architectures": [
     "BertForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 128,
-  "id2label": {
-    "0": "LABEL_0"
-  },
   "initializer_range": 0.02,
   "intermediate_size": 512,
-  "label2id": {
-    "LABEL_0": 0
-  },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
-  "model_type": "bert",
   "num_attention_heads": 2,
   "num_hidden_layers": 2,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "torch_dtype": "float32",
-  "transformers_version": "4.54.0",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 30522
-}

 {
+  "model_type": "bert",
   "architectures": [
     "BertForSequenceClassification"
   ],
+  "attention_proxy_dtype": "float32",
+  "attention_dropout": 0.1,
   "classifier_dropout": null,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 128,
   "initializer_range": 0.02,
   "intermediate_size": 512,
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "num_attention_heads": 2,
   "num_hidden_layers": 2,
+  "num_labels": 1,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
   "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 30522
+}

enhanced_validation_results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

production_enhanced_amd.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch.nn as nn
+from typing import List, Dict, Any
+import numpy as np
+class EnhancedProgressiveAMDModel(nn.Module):
+    """Enhanced model that incorporates utterance count information"""
+    def __init__(self, base_model_name: str, utterance_embedding_dim: int = 8):
+        super().__init__()
+        # Base BERT model
+        self.bert = AutoModelForSequenceClassification.from_pretrained(
+            base_model_name, num_labels=1
+        )
+        # Utterance count embedding
+        self.utterance_count_embedding = nn.Embedding(4, utterance_embedding_dim)
+        # Enhanced classifier
+        bert_hidden_size = self.bert.config.hidden_size
+        self.enhanced_classifier = nn.Sequential(
+            nn.Linear(bert_hidden_size + utterance_embedding_dim, 64),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(64, 1)
+        )
+        self.bert.classifier = nn.Identity()
+    def forward(self, input_ids, attention_mask, utterance_count=None):
+        bert_outputs = self.bert.bert(input_ids=input_ids, attention_mask=attention_mask)
+        pooled_output = bert_outputs.pooler_output
+        if utterance_count is not None:
+            utterance_emb = self.utterance_count_embedding(utterance_count)
+            combined_features = torch.cat([pooled_output, utterance_emb], dim=1)
+            logits = self.enhanced_classifier(combined_features)
+        else:
+            batch_size = pooled_output.size(0)
+            zero_utterance_emb = torch.zeros(batch_size, 8, device=pooled_output.device)
+            combined_features = torch.cat([pooled_output, zero_utterance_emb], dim=1)
+            logits = self.enhanced_classifier(combined_features)
+        return logits
+class ProductionEnhancedAMDClassifier:
+    """Production-ready enhanced AMD classifier"""
+    def __init__(self, model_path: str, tokenizer_name: str = 'prajjwal1/bert-tiny', device: str = 'auto'):
+        if device == 'auto':
+            if torch.backends.mps.is_available():
+                self.device = torch.device('mps')
+            elif torch.cuda.is_available():
+                self.device = torch.device('cuda')
+            else:
+                self.device = torch.device('cpu')
+        else:
+            self.device = torch.device(device)
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+        self.model = EnhancedProgressiveAMDModel(tokenizer_name)
+        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
+        self.model.to(self.device)
+        self.model.eval()
+        self.max_length = 128
+        self.threshold = 0.5
+        print(f"Enhanced AMD classifier loaded on {self.device}")
+    def extract_user_utterances(self, transcript: List[Dict[str, Any]]) -> List[str]:
+        user_utterances = []
+        for utterance in transcript:
+            if utterance.get("speaker", "").lower() == "user":
+                content = utterance.get("content", "").strip()
+                if content:
+                    user_utterances.append(content)
+        return user_utterances
+    @torch.no_grad()
+    def predict(self, transcript: List[Dict[str, Any]]) -> Dict[str, Any]:
+        user_utterances = self.extract_user_utterances(transcript)
+        if not user_utterances:
+            return {
+                'prediction': 'Human',
+                'machine_probability': 0.0,
+                'confidence': 0.5,
+                'utterance_count': 0
+            }
+        utt1 = user_utterances[0] if len(user_utterances) >= 1 else ""
+        utt2 = user_utterances[1] if len(user_utterances) >= 2 else ""
+        utt3 = user_utterances[2] if len(user_utterances) >= 3 else ""
+        combined_text = " ".join([utt for utt in [utt1, utt2, utt3] if utt.strip()])
+        utterance_count = min(len(user_utterances), 3)
+        encoding = self.tokenizer(
+            combined_text,
+            add_special_tokens=True,
+            max_length=self.max_length,
+            padding='max_length',
+            truncation=True,
+            return_tensors='pt'
+        )
+        input_ids = encoding['input_ids'].to(self.device)
+        attention_mask = encoding['attention_mask'].to(self.device)
+        utterance_count_tensor = torch.tensor([utterance_count], dtype=torch.long).to(self.device)
+        logits = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            utterance_count=utterance_count_tensor
+        )
+        machine_prob = torch.sigmoid(logits.squeeze(-1)).item()
+        prediction = 'Machine' if machine_prob >= self.threshold else 'Human'
+        confidence = max(machine_prob, 1 - machine_prob)
+        return {
+            'prediction': prediction,
+            'machine_probability': machine_prob,
+            'confidence': confidence,
+            'utterance_count': utterance_count,
+            'available_utterances': len(user_utterances)
+        }
+# Usage:
+# classifier = ProductionEnhancedAMDClassifier('path/to/model.pth')
+# result = classifier.predict(transcript)

production_enhanced_amd_standalone.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import torch
+import torch.nn as nn
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from typing import List, Dict, Any, Tuple, Optional
+import numpy as np
+import json
+from pathlib import Path
+import warnings
+warnings.filterwarnings('ignore')
+class EnhancedProgressiveAMDModel(nn.Module):
+    """Enhanced AMD model with utterance count awareness"""
+    def __init__(self, model_name: str, utterance_embedding_dim: int = 8):
+        super().__init__()
+        self.bert = AutoModelForSequenceClassification.from_pretrained(
+            model_name, num_labels=1
+        )
+        self.utterance_embedding = nn.Embedding(4, utterance_embedding_dim)  # 0-3 utterances
+        self.enhanced_classifier = nn.Sequential(
+            nn.Linear(self.bert.config.hidden_size + utterance_embedding_dim, 64),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(64, 1)
+        )
+    def forward(self, input_ids, attention_mask, utterance_count):
+        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        bert_hidden = bert_outputs.logits
+        # Utterance count embedding
+        utt_emb = self.utterance_embedding(utterance_count)
+        # Combine BERT output with utterance embedding
+        combined = torch.cat([bert_hidden, utt_emb], dim=-1)
+        # Enhanced classification
+        logits = self.enhanced_classifier(combined)
+        return logits
+class ProductionEnhancedAMDClassifier:
+    """Production-ready enhanced AMD classifier with comprehensive features"""
+    def __init__(self, model_path: str, tokenizer_name: str, device: str = 'auto'):
+        if device == 'auto':
+            if torch.backends.mps.is_available():
+                self.device = torch.device('mps')
+            elif torch.cuda.is_available():
+                self.device = torch.device('cuda')
+            else:
+                self.device = torch.device('cpu')
+        else:
+            self.device = torch.device(device)
+        # Load tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+        # Load model
+        self.model = EnhancedProgressiveAMDModel(tokenizer_name)
+        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
+        self.model.to(self.device)
+        self.model.eval()
+        self.max_length = 128
+        self.threshold = 0.5
+        print(f"Enhanced AMD classifier loaded on {self.device}")
+    def extract_user_utterances(self, transcript: List[Dict[str, Any]]) -> List[str]:
+        """Extract user utterances in chronological order"""
+        user_utterances = []
+        for utterance in transcript:
+            if utterance.get("speaker", "").lower() == "user":
+                content = utterance.get("content", "").strip()
+                if content:
+                    user_utterances.append(content)
+        return user_utterances
+    @torch.no_grad()
+    def predict_enhanced(self, transcript: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Enhanced prediction with utterance count awareness"""
+        user_utterances = self.extract_user_utterances(transcript)
+        if not user_utterances:
+            return {
+                'prediction': 'Human',
+                'machine_probability': 0.0,
+                'confidence': 0.5,
+                'utterance_count': 0,
+                'available_utterances': 0,
+                'text_preview': '',
+                'reasoning': 'No user utterances found'
+            }
+        # Combine up to 3 utterances
+        combined_text = " ".join(user_utterances[:3])
+        utterance_count = min(len(user_utterances), 3)
+        # Tokenize
+        encoding = self.tokenizer(
+            combined_text,
+            add_special_tokens=True,
+            max_length=self.max_length,
+            padding='max_length',
+            truncation=True,
+            return_attention_mask=True,
+            return_tensors='pt'
+        )
+        input_ids = encoding['input_ids'].to(self.device)
+        attention_mask = encoding['attention_mask'].to(self.device)
+        utterance_count_tensor = torch.tensor([utterance_count], dtype=torch.long).to(self.device)
+        # Predict
+        logits = self.model(input_ids, attention_mask, utterance_count_tensor)
+        machine_prob = torch.sigmoid(logits).item()
+        prediction = 'Machine' if machine_prob >= self.threshold else 'Human'
+        confidence = max(machine_prob, 1 - machine_prob)
+        return {
+            'prediction': prediction,
+            'machine_probability': machine_prob,
+            'confidence': confidence,
+            'utterance_count': utterance_count,
+            'available_utterances': len(user_utterances),
+            'text_preview': combined_text[:100] + ('...' if len(combined_text) > 100 else ''),
+            'reasoning': f'Processed {utterance_count} utterances with {confidence:.3f} confidence'
+        }
+    def predict_progressive(self, utterances: List[str],
+                          stage_thresholds: List[float] = [0.95, 0.85, 0.75]) -> Dict[str, Any]:
+        """
+        Progressive utterance analysis for production AMD system
+        """
+        results = {
+            'final_decision': False,
+            'confidence': 0.0,
+            'decision_stage': 0,
+            'stage_results': [],
+            'utterances_processed': 0,
+            'prediction': 'Human',
+            'reasoning': ''
+        }
+        for stage, utterance_count in enumerate([1, 2, 3], 1):
+            if len(utterances) < utterance_count:
+                break
+            # Combine utterances up to current stage
+            combined_text = " ".join(utterances[:utterance_count])
+            # Get prediction
+            transcript = [{"speaker": "user", "content": combined_text}]
+            result = self.predict_enhanced(transcript)
+            stage_result = {
+                'stage': stage,
+                'utterances': utterance_count,
+                'confidence': result['confidence'],
+                'machine_probability': result['machine_probability'],
+                'text': combined_text[:100] + '...' if len(combined_text) > 100 else combined_text
+            }
+            results['stage_results'].append(stage_result)
+            results['utterances_processed'] = utterance_count
+            # Check if confidence meets threshold for this stage
+            if stage <= len(stage_thresholds) and result['confidence'] >= stage_thresholds[stage-1]:
+                results['final_decision'] = result['prediction'] == 'Machine'
+                results['confidence'] = result['confidence']
+                results['decision_stage'] = stage
+                results['prediction'] = result['prediction']
+                results['reasoning'] = f'Decision made at stage {stage} with {result["confidence"]:.3f} confidence'
+                break
+            # Final stage - make decision regardless of confidence
+            if stage == 3:
+                results['final_decision'] = result['prediction'] == 'Machine'
+                results['confidence'] = result['confidence']
+                results['decision_stage'] = stage
+                results['prediction'] = result['prediction']
+                results['reasoning'] = f'Final decision at stage {stage} with {result["confidence"]:.3f} confidence'
+        return results
+    def batch_predict(self, transcripts: List[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
+        """Batch prediction for multiple transcripts"""
+        results = []
+        for transcript in transcripts:
+            result = self.predict_enhanced(transcript)
+            results.append(result)
+        return results
+    def get_model_info(self) -> Dict[str, Any]:
+        """Get model information and statistics"""
+        total_params = sum(p.numel() for p in self.model.parameters())
+        trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
+        return {
+            'model_name': 'Enhanced Progressive AMD Classifier',
+            'device': str(self.device),
+            'total_parameters': total_params,
+            'trainable_parameters': trainable_params,
+            'max_length': self.max_length,
+            'threshold': self.threshold,
+            'tokenizer_name': self.tokenizer.name_or_path,
+            'vocab_size': self.tokenizer.vocab_size
+        }
+# Usage examples and testing functions
+def test_production_classifier():
+    """Test the production classifier with sample data"""
+    # Initialize classifier
+    classifier = ProductionEnhancedAMDClassifier(
+        model_path='output/best_enhanced_progressive_amd.pth',
+        tokenizer_name='prajjwal1/bert-tiny'
+    )
+    # Test cases
+    test_cases = [
+        # Human responses
+        {
+            'name': 'Single Human Utterance',
+            'transcript': [{"speaker": "user", "content": "Yes, I'm here. What do you need?"}]
+        },
+        {
+            'name': 'Multi Human Utterances',
+            'transcript': [
+                {"speaker": "user", "content": "Hello?"},
+                {"speaker": "user", "content": "Yes, this is John speaking."},
+                {"speaker": "user", "content": "How can I help you?"}
+            ]
+        },
+        # Machine responses
+        {
+            'name': 'Voicemail Message',
+            'transcript': [{"speaker": "user", "content": "Hi, you've reached John's voicemail. I'm not available right now, but please leave your name, number, and a brief message after the beep."}]
+        },
+        {
+            'name': 'Automated Response',
+            'transcript': [
+                {"speaker": "user", "content": "The person you are trying to reach is not available."},
+                {"speaker": "user", "content": "Please leave a message after the tone."}
+            ]
+        }
+    ]
+    print("Testing Production Enhanced AMD Classifier")
+    print("=" * 60)
+    for test_case in test_cases:
+        print(f"
+Test: {test_case['name']}")
+        result = classifier.predict_enhanced(test_case['transcript'])
+        print(f"  Prediction: {result['prediction']}")
+        print(f"  Machine Probability: {result['machine_probability']:.4f}")
+        print(f"  Confidence: {result['confidence']:.4f}")
+        print(f"  Utterance Count: {result['utterance_count']}")
+        print(f"  Text Preview: {result['text_preview']}")
+        print(f"  Reasoning: {result['reasoning']}")
+    return classifier
+if __name__ == "__main__":
+    # Run tests
+    test_production_classifier()

push_to_huggingface.py ADDED Viewed

	@@ -0,0 +1,288 @@

+#!/usr/bin/env python3
+"""
+Script to push the trained BERT-Tiny AMD model to Hugging Face Hub
+"""
+import os
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from huggingface_hub import HfApi, Repository
+import json
+from pathlib import Path
+# Configuration
+REPO_ID = "Adya662/bert-tiny-amd"
+MODEL_PATH = "best_enhanced_progressive_amd.pth"
+BASE_MODEL = "prajjwal1/bert-tiny"
+def create_model_config():
+    """Create model configuration"""
+    config = {
+        "model_type": "bert",
+        "architectures": ["BertForSequenceClassification"],
+        "attention_proxy_dtype": "float32",
+        "attention_dropout": 0.1,
+        "classifier_dropout": None,
+        "hidden_act": "gelu",
+        "hidden_dropout_prob": 0.1,
+        "hidden_size": 128,
+        "initializer_range": 0.02,
+        "intermediate_size": 512,
+        "layer_norm_eps": 1e-12,
+        "max_position_embeddings": 512,
+        "model_type": "bert",
+        "num_attention_heads": 2,
+        "num_hidden_layers": 2,
+        "num_labels": 1,
+        "pad_token_id": 0,
+        "position_embedding_type": "absolute",
+        "problem_type": "single_label_classification",
+        "torch_dtype": "float32",
+        "transformers_version": "4.21.0",
+        "type_vocab_size": 2,
+        "use_cache": True,
+        "vocab_size": 30522
+    }
+    return config
+def create_training_metadata():
+    """Create training metadata"""
+    metadata = {
+        "model_name": "bert-tiny-amd",
+        "base_model": "prajjwal1/bert-tiny",
+        "task": "text-classification",
+        "dataset": "ElevateNow call center transcripts",
+        "language": "en",
+        "license": "mit",
+        "pipeline_tag": "text-classification",
+        "tags": [
+            "text-classification",
+            "answering-machine-detection",
+            "bert-tiny",
+            "binary-classification",
+            "call-center",
+            "voice-processing"
+        ],
+        "performance": {
+            "validation_accuracy": 0.9394,
+            "precision": 0.9275,
+            "recall": 0.8727,
+            "f1_score": 0.8993
+        },
+        "training_details": {
+            "total_samples": 3548,
+            "training_samples": 2838,
+            "validation_samples": 710,
+            "epochs": 15,
+            "batch_size": 32,
+            "learning_rate": 3e-5,
+            "device": "mps"
+        }
+    }
+    return metadata
+def push_model_to_hub():
+    """Push the trained model to Hugging Face Hub"""
+    print("🚀 Starting model upload to Hugging Face Hub...")
+    # Initialize HF API
+    api = HfApi()
+    # Create model configuration
+    config = create_model_config()
+    # Save config
+    with open("config.json", "w") as f:
+        json.dump(config, f, indent=2)
+    # Create training metadata
+    metadata = create_training_metadata()
+    # Save training metadata
+    with open("training_metadata.json", "w") as f:
+        json.dump(metadata, f, indent=2)
+    # Load tokenizer from base model
+    print("📥 Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+    tokenizer.save_pretrained(".")
+    # Load base model and update with trained weights
+    print("📥 Loading base model...")
+    model = AutoModelForSequenceClassification.from_pretrained(
+        BASE_MODEL,
+        num_labels=1,
+        config=config
+    )
+    # Load trained weights
+    print("📥 Loading trained weights...")
+    if os.path.exists(MODEL_PATH):
+        state_dict = torch.load(MODEL_PATH, map_location='cpu')
+        model.load_state_dict(state_dict)
+        print("✅ Trained weights loaded successfully")
+    else:
+        print(f"❌ Model file {MODEL_PATH} not found!")
+        return False
+    # Save model
+    print("💾 Saving model...")
+    model.save_pretrained(".", safe_serialization=True)
+    # Create README.md
+    readme_content = """---
+license: mit
+tags:
+- text-classification
+- answering-machine-detection
+- bert-tiny
+- binary-classification
+- call-center
+- voice-processing
+pipeline_tag: text-classification
+---
+# BERT-Tiny AMD Classifier
+A lightweight BERT-Tiny model fine-tuned for Answering Machine Detection (AMD) in call center environments.
+## Model Description
+This model is based on `prajjwal1/bert-tiny` and fine-tuned to classify phone call transcripts as either human or machine (answering machine/voicemail) responses. It's designed for real-time call center applications where quick and accurate detection of answering machines is crucial.
+## Model Architecture
+- **Base Model**: `prajjwal1/bert-tiny` (2 layers, 128 hidden size, 2 attention heads)
+- **Total Parameters**: ~4.4M (lightweight and efficient)
+- **Input**: User transcript text (max 128 tokens)
+- **Output**: Single logit with sigmoid activation for binary classification
+- **Loss Function**: BCEWithLogitsLoss with positive weight for class imbalance
+## Performance
+- **Validation Accuracy**: 93.94%
+- **Precision**: 92.75%
+- **Recall**: 87.27%
+- **F1-Score**: 89.93%
+- **Training Device**: MPS (Apple Silicon GPU)
+- **Best Epoch**: 15 (with early stopping)
+## Training Data
+- **Total Samples**: 3,548 phone call transcripts
+- **Training Set**: 2,838 samples
+- **Validation Set**: 710 samples
+- **Class Distribution**: 30.8% machine calls, 69.2% human calls
+- **Source**: ElevateNow call center data
+## Usage
+### Basic Inference
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+# Load model and tokenizer
+model = AutoModelForSequenceClassification.from_pretrained("Adya662/bert-tiny-amd")
+tokenizer = AutoTokenizer.from_pretrained("Adya662/bert-tiny-amd")
+# Prepare input
+text = "Hello, this is John speaking"
+inputs = tokenizer(text, return_tensors="pt", max_length=128, truncation=True, padding=True)
+# Make prediction
+with torch.no_grad():
+    outputs = model(**inputs)
+    logits = outputs.logits.squeeze(-1)
+    probability = torch.sigmoid(logits).item()
+    is_machine = probability >= 0.5
+print(f"Prediction: {'Machine' if is_machine else 'Human'}")
+print(f"Confidence: {probability:.4f}")
+```
+### Production Usage
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+class AMDClassifier:
+    def __init__(self, model_name="Adya662/bert-tiny-amd"):
+        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.model.to(self.device)
+        self.model.eval()
+    def predict(self, transcript_text, threshold=0.5):
+        # Predict if transcript is from answering machine
+        inputs = self.tokenizer(
+            transcript_text,
+            return_tensors="pt",
+            max_length=128,
+            truncation=True,
+            padding=True
+        ).to(self.device)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            logits = outputs.logits.squeeze(-1)
+            probability = torch.sigmoid(logits).item()
+            is_machine = probability >= threshold
+        return is_machine, probability
+# Usage
+classifier = AMDClassifier()
+is_machine, confidence = classifier.predict("Hello, this is John speaking")
+```
+## Training Details
+- **Optimizer**: AdamW with weight decay (0.01)
+- **Learning Rate**: 3e-5 with linear scheduling
+- **Batch Size**: 32
+- **Epochs**: 15 (with early stopping)
+- **Early Stopping**: Patience of 3 epochs
+- **Class Imbalance**: Handled with positive weight
+## Limitations
+- Trained on English phone call transcripts
+- May not generalize well to other languages or domains
+- Performance may vary with different transcription quality
+- Designed for short utterances (max 128 tokens)
+## License
+MIT License - see LICENSE file for details.
+"""
+    with open("README.md", "w") as f:
+        f.write(readme_content)
+    # Upload to Hub
+    print("⬆️ Uploading to Hugging Face Hub...")
+    try:
+        api.upload_folder(
+            folder_path=".",
+            repo_id=REPO_ID,
+            repo_type="model",
+            commit_message="Upload trained BERT-Tiny AMD model with enhanced progressive features"
+        )
+        print("✅ Model uploaded successfully!")
+        print(f"🔗 Model available at: https://huggingface.co/{REPO_ID}")
+        return True
+    except Exception as e:
+        print(f"❌ Upload failed: {e}")
+        return False
+if __name__ == "__main__":
+    success = push_model_to_hub()
+    if success:
+        print("\n🎉 Model deployment completed successfully!")
+    else:
+        print("\n💥 Model deployment failed!")

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f8c3c949e8963d27748803fe785af04652da64704533cfcdcdeae7505f0d328
+size 17598379

rule_based_vs_bert_comparison.png ADDED Viewed

Git LFS Details

SHA256: 4e0fc7295dfe5c2be6696dcb223771ff37dd2b9992f166ed6b791c437fc506c7
Pointer size: 131 Bytes
Size of remote file: 428 kB

simple_upload.py ADDED Viewed

	@@ -0,0 +1,239 @@

+#!/usr/bin/env python3
+"""
+Simple script to upload model files to Hugging Face Hub
+"""
+import os
+import torch
+from transformers import AutoTokenizer
+from huggingface_hub import HfApi
+import json
+from pathlib import Path
+# Configuration
+REPO_ID = "Adya662/bert-tiny-amd"
+MODEL_PATH = "best_enhanced_progressive_amd.pth"
+BASE_MODEL = "prajjwal1/bert-tiny"
+def create_model_config():
+    """Create model configuration"""
+    config = {
+        "model_type": "bert",
+        "architectures": ["BertForSequenceClassification"],
+        "attention_proxy_dtype": "float32",
+        "attention_dropout": 0.1,
+        "classifier_dropout": None,
+        "hidden_act": "gelu",
+        "hidden_dropout_prob": 0.1,
+        "hidden_size": 128,
+        "initializer_range": 0.02,
+        "intermediate_size": 512,
+        "layer_norm_eps": 1e-12,
+        "max_position_embeddings": 512,
+        "model_type": "bert",
+        "num_attention_heads": 2,
+        "num_hidden_layers": 2,
+        "num_labels": 1,
+        "pad_token_id": 0,
+        "position_embedding_type": "absolute",
+        "problem_type": "single_label_classification",
+        "torch_dtype": "float32",
+        "transformers_version": "4.21.0",
+        "type_vocab_size": 2,
+        "use_cache": True,
+        "vocab_size": 30522
+    }
+    return config
+def create_training_metadata():
+    """Create training metadata"""
+    metadata = {
+        "model_name": "bert-tiny-amd",
+        "base_model": "prajjwal1/bert-tiny",
+        "task": "text-classification",
+        "dataset": "ElevateNow call center transcripts",
+        "language": "en",
+        "license": "mit",
+        "pipeline_tag": "text-classification",
+        "tags": [
+            "text-classification",
+            "answering-machine-detection",
+            "bert-tiny",
+            "binary-classification",
+            "call-center",
+            "voice-processing"
+        ],
+        "performance": {
+            "validation_accuracy": 0.9394,
+            "precision": 0.9275,
+            "recall": 0.8727,
+            "f1_score": 0.8993
+        },
+        "training_details": {
+            "total_samples": 3548,
+            "training_samples": 2838,
+            "validation_samples": 710,
+            "epochs": 15,
+            "batch_size": 32,
+            "learning_rate": 3e-5,
+            "device": "mps"
+        }
+    }
+    return metadata
+def upload_files():
+    """Upload files to Hugging Face Hub"""
+    print("🚀 Starting file upload to Hugging Face Hub...")
+    # Initialize HF API
+    api = HfApi()
+    # Create model configuration
+    config = create_model_config()
+    # Save config
+    with open("config.json", "w") as f:
+        json.dump(config, f, indent=2)
+    # Create training metadata
+    metadata = create_training_metadata()
+    # Save training metadata
+    with open("training_metadata.json", "w") as f:
+        json.dump(metadata, f, indent=2)
+    # Load and save tokenizer from base model
+    print("📥 Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+    tokenizer.save_pretrained(".")
+    # Copy model weights
+    if os.path.exists(MODEL_PATH):
+        print("📥 Copying model weights...")
+        import shutil
+        shutil.copy2(MODEL_PATH, "pytorch_model.bin")
+        print("✅ Model weights copied successfully")
+    else:
+        print(f"❌ Model file {MODEL_PATH} not found!")
+        return False
+    # Create README.md
+    readme_content = """---
+license: mit
+tags:
+- text-classification
+- answering-machine-detection
+- bert-tiny
+- binary-classification
+- call-center
+- voice-processing
+pipeline_tag: text-classification
+---
+# BERT-Tiny AMD Classifier
+A lightweight BERT-Tiny model fine-tuned for Answering Machine Detection (AMD) in call center environments.
+## Model Description
+This model is based on `prajjwal1/bert-tiny` and fine-tuned to classify phone call transcripts as either human or machine (answering machine/voicemail) responses. It's designed for real-time call center applications where quick and accurate detection of answering machines is crucial.
+## Model Architecture
+- **Base Model**: `prajjwal1/bert-tiny` (2 layers, 128 hidden size, 2 attention heads)
+- **Total Parameters**: ~4.4M (lightweight and efficient)
+- **Input**: User transcript text (max 128 tokens)
+- **Output**: Single logit with sigmoid activation for binary classification
+- **Loss Function**: BCEWithLogitsLoss with positive weight for class imbalance
+## Performance
+- **Validation Accuracy**: 93.94%
+- **Precision**: 92.75%
+- **Recall**: 87.27%
+- **F1-Score**: 89.93%
+- **Training Device**: MPS (Apple Silicon GPU)
+- **Best Epoch**: 15 (with early stopping)
+## Training Data
+- **Total Samples**: 3,548 phone call transcripts
+- **Training Set**: 2,838 samples
+- **Validation Set**: 710 samples
+- **Class Distribution**: 30.8% machine calls, 69.2% human calls
+- **Source**: ElevateNow call center data
+## Usage
+### Basic Inference
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+# Load model and tokenizer
+model = AutoModelForSequenceClassification.from_pretrained("Adya662/bert-tiny-amd")
+tokenizer = AutoTokenizer.from_pretrained("Adya662/bert-tiny-amd")
+# Prepare input
+text = "Hello, this is John speaking"
+inputs = tokenizer(text, return_tensors="pt", max_length=128, truncation=True, padding=True)
+# Make prediction
+with torch.no_grad():
+    outputs = model(**inputs)
+    logits = outputs.logits.squeeze(-1)
+    probability = torch.sigmoid(logits).item()
+    is_machine = probability >= 0.5
+print(f"Prediction: {'Machine' if is_machine else 'Human'}")
+print(f"Confidence: {probability:.4f}")
+```
+## Training Details
+- **Optimizer**: AdamW with weight decay (0.01)
+- **Learning Rate**: 3e-5 with linear scheduling
+- **Batch Size**: 32
+- **Epochs**: 15 (with early stopping)
+- **Early Stopping**: Patience of 3 epochs
+- **Class Imbalance**: Handled with positive weight
+## Limitations
+- Trained on English phone call transcripts
+- May not generalize well to other languages or domains
+- Performance may vary with different transcription quality
+- Designed for short utterances (max 128 tokens)
+## License
+MIT License - see LICENSE file for details.
+"""
+    with open("README.md", "w") as f:
+        f.write(readme_content)
+    # Upload to Hub
+    print("⬆️ Uploading to Hugging Face Hub...")
+    try:
+        api.upload_folder(
+            folder_path=".",
+            repo_id=REPO_ID,
+            repo_type="model",
+            commit_message="Upload trained BERT-Tiny AMD model"
+        )
+        print("✅ Model uploaded successfully!")
+        print(f"🔗 Model available at: https://huggingface.co/{REPO_ID}")
+        return True
+    except Exception as e:
+        print(f"❌ Upload failed: {e}")
+        return False
+if __name__ == "__main__":
+    success = upload_files()
+    if success:
+        print("\n🎉 Model deployment completed successfully!")
+    else:
+        print("\n💥 Model deployment failed!")

training_metadata.json CHANGED Viewed

@@ -1,82 +1,32 @@
 {
-  "training_config": {
-    "model_name": "prajjwal1/bert-tiny",
-    "max_length": 128,
     "batch_size": 32,
     "learning_rate": 3e-05,
-    "num_epochs": 15,
-    "patience": 3,
-    "test_size": 0.2,
-    "device": "mps",
-    "csv_file": "all_EN_calls.csv",
-    "s3_bucket": "voicex-call-recordings"
-  },
-  "final_metrics": {
-    "accuracy": 0.9732394366197183,
-    "precision": 0.9476439790575916,
-    "recall": 0.9526315789473684,
-    "f1": 0.9501312335958005,
-    "confusion_matrix": [
-      [
-        510,
-        10
-      ],
-      [
-        9,
-        181
-      ]
-    ]
-  },
-  "pos_weight": 2.729303547963206,
-  "threshold": 0.5,
-  "training_history": {
-    "train_losses": [
-      0.9819882733098576,
-      0.714315825968646,
-      0.4502890578816446,
-      0.3126165846760353,
-      0.2370055838582221,
-      0.1957313610094317,
-      0.16171495624807444,
-      0.14206559118929873,
-      0.13111768872215507,
-      0.12663358307621453,
-      0.11454316391871217,
-      0.09756730245740226,
-      0.10681139669391547,
-      0.09500317254595542
-    ],
-    "val_losses": [
-      0.8653972615366397,
-      0.5405754589516184,
-      0.37915164361829345,
-      0.2985233405362005,
-      0.25458563475505164,
-      0.22056782958300217,
-      0.2148797696699267,
-      0.20188165715207224,
-      0.2006922288109427,
-      0.18514911133957945,
-      0.18336524668595064,
-      0.1881559074896833,
-      0.1841501404085885,
-      0.1853098363980003
-    ],
-    "val_accuracies": [
-      0.7,
-      0.9619718309859155,
-      0.9633802816901409,
-      0.967605633802817,
-      0.9690140845070423,
-      0.9704225352112676,
-      0.971830985915493,
-      0.971830985915493,
-      0.971830985915493,
-      0.9732394366197183,
-      0.9732394366197183,
-      0.9704225352112676,
-      0.9704225352112676,
-      0.9704225352112676
-    ]
   }
 }

 {
+  "model_name": "bert-tiny-amd",
+  "base_model": "prajjwal1/bert-tiny",
+  "task": "text-classification",
+  "dataset": "ElevateNow call center transcripts",
+  "language": "en",
+  "license": "mit",
+  "pipeline_tag": "text-classification",
+  "tags": [
+    "text-classification",
+    "answering-machine-detection",
+    "bert-tiny",
+    "binary-classification",
+    "call-center",
+    "voice-processing"
+  ],
+  "performance": {
+    "validation_accuracy": 0.9394,
+    "precision": 0.9275,
+    "recall": 0.8727,
+    "f1_score": 0.8993
+  },
+  "training_details": {
+    "total_samples": 3548,
+    "training_samples": 2838,
+    "validation_samples": 710,
+    "epochs": 15,
     "batch_size": 32,
     "learning_rate": 3e-05,
+    "device": "mps"
   }
 }