Mariam-33333 commited on
Commit
7382c01
·
verified ·
1 Parent(s): ac647bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -394
app.py CHANGED
@@ -1,4 +1,5 @@
1
- # integrated_voice_chat.py
 
2
  import os
3
  import re
4
  import random
@@ -6,111 +7,132 @@ import tempfile
6
  import warnings
7
  warnings.filterwarnings("ignore")
8
 
9
- import gradio as gr
10
  import numpy as np
11
  import pandas as pd
12
  import soundfile as sf
13
  import librosa
14
  import joblib
15
- import requests
16
-
17
- # ML libs (optional)
18
- try:
19
- import tensorflow as tf
20
- from tensorflow.keras import layers, models
21
- TF_AVAILABLE = True
22
- except Exception:
23
- TF_AVAILABLE = False
24
-
25
  from sklearn.ensemble import RandomForestClassifier
26
  from sklearn.preprocessing import LabelEncoder
27
 
28
- # Whisper (optional) for transcription
29
- try:
30
- import whisper as openai_whisper
31
- whisper_model = openai_whisper.load_model("base", device="cpu")
32
- except Exception:
33
- whisper_model = None
34
-
35
- # Translation models (optional)
36
- try:
37
- from transformers import MarianMTModel, MarianTokenizer
38
- en_to_ar_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
39
- en_to_ar_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
40
- ar_to_en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en")
41
- ar_to_en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ar-en")
42
- except Exception:
43
- en_to_ar_model = en_to_ar_tokenizer = ar_to_en_model = ar_to_en_tokenizer = None
44
-
45
- from gtts import gTTS
46
-
47
- # ---------------- Configuration ----------------
48
- CSV_PATH = "deepseek_csv_20251105_09a9e0.csv" # Use your actual CSV file
49
  AUDIO_FOLDER = "Dataset"
50
- MODEL_DIR = "models"
51
- CNN_MODEL_FILE = os.path.join(MODEL_DIR, "ravdess_cnn.h5")
52
- MODEL_DOWNLOAD_URL = "https://example.com/path/to/ravdess_cnn.h5" # replace if available
53
  SAMPLE_RATE = 16000
54
- N_MFCC = 40
55
- MAX_MFCC_FRAMES = 128
56
- EMOTIONS_ALLOWED = ["sad", "angry", "happy", "neutral"]
57
 
58
- os.makedirs(MODEL_DIR, exist_ok=True)
59
  os.makedirs(AUDIO_FOLDER, exist_ok=True)
60
 
61
- # Diagnostic check
62
- print("Current working directory:", os.getcwd())
63
- print("CSV path:", CSV_PATH)
64
- print("CSV exists:", os.path.exists(CSV_PATH))
65
-
66
- # ---------------- Original chatbot lists (kept) ----------------
67
- MENTAL_KEYWORDS = [
68
- "depression", "depressed", "anxiety", "anxious", "panic", "stress", "sad", "lonely",
69
- "trauma", "mental", "therapy", "therapist", "counselor", "mood", "overwhelmed", "anger",
70
- "fear", "worry", "self-esteem", "confidence", "motivation", "relationship", "cope", "coping",
71
- "relax", "calm", "sleep", "emotion", "feeling", "feel", "thoughts", "help", "life", "advice",
72
- "unmotivated", "lost", "hopeless", "tired", "burnout", "cry", "hurt", "love", "breakup",
73
- "friend", "family", "alone", "heartbroken", "scared", "fearful",
74
- "ana", "zahqan", "daye2", "ha2t", "mota3ab", "mota3eb", "za3lan", "malo", "khalni", "mash3or",
75
- "bakhaf", "w7ed", "msh 3aref", "mash fahem", "malish", "3ayez", "ayez", "7azeen", "mdaye2",
76
- "حزين", "تعبان", "قلق", "خايف", "وحدة", "ضيق", "توتر", "زعلان", "اكتئاب", "علاج",
77
- "مشاعر", "مضغوط", "قلقان", "وحدي", "مش مبسوط", "زهقان", "ضايق", "تعب", "مش مرتاح",
78
- ]
79
 
80
- OFF_TOPIC = [
81
- "recipe", "song", "music", "lyrics", "joke", "funny", "laugh", "code", "python", "program",
82
- "game", "food", "cook", "movie", "film", "series", "sport", "football", "instagram",
83
- "tiktok", "money", "business", "crypto", "ai", "computer",
84
- "نكتة", "ضحك", "اغنية", "اغاني", "طبخ", "اكل", "فيلم", "مسلسل", "كورة", "رياضة",
85
- "بيزنس", "فلوس", "العاب", "لعبة", "كود", "برمجة", "ذكاء اصطناعي"
86
- ]
 
 
87
 
88
- OFF_TOPIC_RESPONSES = [
89
- "I'm here to help with emotional and mental well-being. Let's focus on how you're feeling, coping, or managing your emotions today.",
90
- "I specialize in mental and emotional health conversations. Tell me what's been on your mind lately.",
91
- "Let's bring it back to how you've been feeling — I'm here to help you talk through emotions, stress, or challenges.",
92
- "My goal is to support your mental health. How have things been emotionally for you lately?",
93
- "I'm here for emotional and mental support only. What's been bothering you recently?",
94
- ]
 
 
 
 
 
 
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  MENTAL_RESPONSES_EN = [
97
- "I hear that you're going through a difficult time. It takes courage to talk about these feelings.",
98
- "Thank you for sharing that with me. Your feelings are completely valid.",
99
- "I'm here to listen and support you. Would you like to talk more about what you're experiencing?",
100
- "It sounds like you're carrying a lot right now. Remember that you don't have to face everything alone.",
101
- "I appreciate you opening up. Let's work through this together, one step at a time.",
102
- "Your feelings matter, and I'm here to support you through this challenging time.",
103
- "It's okay to not be okay. I'm here to listen whenever you're ready to talk.",
104
  ]
105
 
106
- MENTAL_RESPONSES_AR = [
107
- "أسمع أنك تمر بوقت صعب. يتطلب الأمر شجاعة للتحدث عن هذه المشاعر.",
108
- "شكرًا لك على مشاركة ذلك معي. مشاعرك صحيحة تمامًا.",
109
- "أنا هنا للاستماع ودعمك. هل ترغب في التحدث أكثر عما تمر به؟",
110
- "يبدو أنك تحمل الكثير الآن. تذكر أنك لست مضطرًا لمواجهة كل شيء بمفردك.",
111
- "أقدر انفتاحك. دعنا نعمل على هذا معًا، خطوة بخطوة.",
112
- "مشاعرك مهمة، وأنا هنا لدعمك خلال هذا الوقت الصعب.",
113
- "لا بأس في أن لا تكون على ما يرام. أنا هنا للاستماع عندما تكون مستعدًا للتحدث.",
114
  ]
115
 
116
  def contains_arabic(text: str) -> bool:
@@ -118,12 +140,11 @@ def contains_arabic(text: str) -> bool:
118
 
119
  def is_mental_health_related(text: str) -> bool:
120
  text_lower = text.lower()
121
- has_arabic = contains_arabic(text_lower)
122
  if any(word in text_lower for word in OFF_TOPIC):
123
  return False
124
  if any(word in text_lower for word in MENTAL_KEYWORDS):
125
  return True
126
- if has_arabic:
127
  return True
128
  return False
129
 
@@ -133,244 +154,22 @@ def respond(message):
133
  if not is_mental_health_related(message):
134
  return random.choice(OFF_TOPIC_RESPONSES)
135
  if contains_arabic(message):
136
- return random.choice(MENTAL_RESPONSES_AR)
137
  else:
138
  return random.choice(MENTAL_RESPONSES_EN)
139
 
140
- # ---------------- Audio helpers ----------------
141
- def load_audio(path, sr=SAMPLE_RATE):
142
- if not os.path.isfile(path):
143
- raise FileNotFoundError(f"Audio file not found: {path}")
144
- data, orig_sr = sf.read(path, dtype='float32')
145
- if data.ndim > 1:
146
- data = np.mean(data, axis=1)
147
- if orig_sr != sr:
148
- data = librosa.resample(data, orig_sr, sr)
149
- return data
150
-
151
- def compute_mfcc_feature(y, sr=SAMPLE_RATE, n_mfcc=N_MFCC, max_len=MAX_MFCC_FRAMES):
152
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
153
- if mfcc.shape[1] < max_len:
154
- pad_width = max_len - mfcc.shape[1]
155
- mfcc = np.pad(mfcc, ((0,0),(0,pad_width)), mode='constant')
156
- else:
157
- mfcc = mfcc[:, :max_len]
158
- mfcc = (mfcc - np.mean(mfcc)) / (np.std(mfcc) + 1e-9)
159
- return mfcc.T # (time, n_mfcc)
160
-
161
- # ---------------- CNN model (RAVDESS-like) ----------------
162
- def build_cnn_model(input_shape=(MAX_MFCC_FRAMES, N_MFCC), n_classes=4):
163
- if not TF_AVAILABLE:
164
- raise RuntimeError("TensorFlow not installed. Install tensorflow to use the CNN model.")
165
- inp = layers.Input(shape=input_shape)
166
- x = layers.Conv1D(64, kernel_size=3, activation='relu', padding='same')(inp)
167
- x = layers.BatchNormalization()(x)
168
- x = layers.MaxPooling1D(2)(x)
169
- x = layers.Conv1D(128, kernel_size=3, activation='relu', padding='same')(x)
170
- x = layers.BatchNormalization()(x)
171
- x = layers.MaxPooling1D(2)(x)
172
- x = layers.Conv1D(256, kernel_size=3, activation='relu', padding='same')(x)
173
- x = layers.GlobalAveragePooling1D()(x)
174
- x = layers.Dense(128, activation='relu')(x)
175
- x = layers.Dropout(0.3)(x)
176
- out = layers.Dense(n_classes, activation='softmax')(x)
177
- model = models.Model(inputs=inp, outputs=out)
178
- model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
179
- return model
180
-
181
- def download_pretrained_model(url=MODEL_DOWNLOAD_URL, dest=CNN_MODEL_FILE):
182
- try:
183
- r = requests.get(url, stream=True, timeout=30)
184
- r.raise_for_status()
185
- with open(dest, 'wb') as f:
186
- for chunk in r.iter_content(chunk_size=8192):
187
- if chunk:
188
- f.write(chunk)
189
- return True
190
- except Exception as e:
191
- print("Model download failed:", e)
192
- return False
193
-
194
- # ---------------- RF fallback ----------------
195
- RF_MODEL_PATH = os.path.join(MODEL_DIR, "rf_emotion.pkl")
196
- RF_META_PATH = os.path.join(MODEL_DIR, "rf_meta.pkl")
197
-
198
- def create_fallback_rf_model():
199
- """Create a simple fallback RF model when no dataset is available"""
200
- print("Creating fallback RF model with synthetic data...")
201
-
202
- # Create synthetic MFCC-like features
203
- np.random.seed(42)
204
- n_samples = 200
205
- n_features = N_MFCC
206
-
207
- X = np.random.randn(n_samples, n_features)
208
- emotions = ["sad", "angry", "happy", "neutral"]
209
- y = np.random.choice(emotions, n_samples)
210
-
211
- # Add some pattern to make it somewhat meaningful
212
- for i, emotion in enumerate(y):
213
- if emotion == "sad":
214
- X[i, :5] -= 1.0 # Lower frequencies for sad
215
- elif emotion == "angry":
216
- X[i, 5:10] += 1.5 # Higher frequencies for angry
217
- elif emotion == "happy":
218
- X[i, :] += 0.5 # Generally higher for happy
219
-
220
- le = LabelEncoder()
221
- y_enc = le.fit_transform(y)
222
-
223
- rf = RandomForestClassifier(n_estimators=100, random_state=42)
224
- rf.fit(X, y_enc)
225
-
226
- joblib.dump(rf, RF_MODEL_PATH)
227
- joblib.dump({"label_encoder": le}, RF_META_PATH)
228
-
229
- return rf, {"label_encoder": le}
230
-
231
- def train_or_load_rf(csv_path=CSV_PATH, rebuild=False):
232
- if os.path.isfile(RF_MODEL_PATH) and not rebuild:
233
- try:
234
- rf = joblib.load(RF_MODEL_PATH)
235
- meta = joblib.load(RF_META_PATH)
236
- print("Loaded pre-trained RF model")
237
- return rf, meta
238
- except Exception as e:
239
- print("Error loading saved RF model, rebuilding...", e)
240
- rebuild = True
241
-
242
- if not os.path.isfile(csv_path):
243
- print(f"CSV not found at {csv_path}. Creating fallback RF model...")
244
- return create_fallback_rf_model()
245
-
246
- try:
247
- df = pd.read_csv(csv_path)
248
- if not set(["audio_path", "emotion"]).issubset(df.columns):
249
- print("CSV missing required columns, using fallback...")
250
- return create_fallback_rf_model()
251
-
252
- X = []
253
- y = []
254
- valid_count = 0
255
-
256
- print("Processing audio files for RF training...")
257
- for _, row in df.iterrows():
258
- if valid_count >= 100: # Limit for faster processing
259
- break
260
-
261
- ap = row["audio_path"]
262
- if not os.path.isabs(ap):
263
- # Try multiple possible locations
264
- possible_paths = [
265
- ap,
266
- os.path.join(os.path.dirname(csv_path), ap),
267
- os.path.join(AUDIO_FOLDER, ap),
268
- os.path.join("Dataset", ap)
269
- ]
270
- ap = None
271
- for path in possible_paths:
272
- if os.path.isfile(path):
273
- ap = path
274
- break
275
-
276
- if not ap or not os.path.isfile(ap):
277
- continue
278
-
279
- try:
280
- y_audio = load_audio(ap)
281
- feat = compute_mfcc_feature(y_audio).mean(axis=0) # simple fixed vector
282
- X.append(feat)
283
- y.append(row["emotion"].lower())
284
- valid_count += 1
285
- if valid_count % 20 == 0:
286
- print(f"Processed {valid_count} audio files...")
287
- except Exception as e:
288
- continue
289
-
290
- if len(X) == 0:
291
- print("No valid audio files found, using fallback...")
292
- return create_fallback_rf_model()
293
-
294
- X = np.vstack(X)
295
- le = LabelEncoder()
296
- y_enc = le.fit_transform(y)
297
-
298
- rf = RandomForestClassifier(n_estimators=200, random_state=42)
299
- rf.fit(X, y_enc)
300
-
301
- joblib.dump(rf, RF_MODEL_PATH)
302
- joblib.dump({"label_encoder": le}, RF_META_PATH)
303
-
304
- print(f"RF model trained successfully with {len(X)} samples")
305
- return rf, {"label_encoder": le}
306
-
307
- except Exception as e:
308
- print(f"Error training RF model: {e}, using fallback...")
309
- return create_fallback_rf_model()
310
-
311
- # ---------------- On-demand model loader ----------------
312
- _cnn_model = None
313
- _rf_model = None
314
- _rf_meta = None
315
- _label_map = {i: emo for i, emo in enumerate(EMOTIONS_ALLOWED)}
316
-
317
- def prepare_model_on_demand():
318
- global _cnn_model, _rf_model, _rf_meta
319
- # Try local CNN model if TF available
320
- if TF_AVAILABLE and os.path.isfile(CNN_MODEL_FILE):
321
- try:
322
- _cnn_model = tf.keras.models.load_model(CNN_MODEL_FILE)
323
- print("Loaded CNN model")
324
- return "cnn"
325
- except Exception as e:
326
- print("Failed to load local CNN model:", e)
327
- # Try to download
328
- if TF_AVAILABLE and not os.path.isfile(CNN_MODEL_FILE):
329
- try:
330
- ok = download_pretrained_model()
331
- if ok and os.path.isfile(CNN_MODEL_FILE):
332
- _cnn_model = tf.keras.models.load_model(CNN_MODEL_FILE)
333
- print("Downloaded and loaded CNN model")
334
- return "cnn"
335
- except Exception as e:
336
- print("Download/load of CNN failed:", e)
337
- # Fallback to RF
338
- _rf_model, _rf_meta = train_or_load_rf()
339
- print("Using RF model for emotion detection")
340
- return "rf"
341
-
342
- def predict_emotion_from_audiofile(audio_filepath):
343
- """
344
- Loads model on first use (on-demand), extracts features, and returns one of EMOTIONS_ALLOWED.
345
- """
346
- global _cnn_model, _rf_model, _rf_meta
347
- if _cnn_model is None and _rf_model is None:
348
- model_type = prepare_model_on_demand()
349
- else:
350
- model_type = "cnn" if _cnn_model is not None else "rf"
351
-
352
  try:
353
- y_audio = load_audio(audio_filepath)
354
-
355
- if model_type == "cnn" and _cnn_model is not None:
356
- mf = compute_mfcc_feature(y_audio) # (time, n_mfcc)
357
- inp = np.expand_dims(mf, axis=0)
358
- preds = _cnn_model.predict(inp, verbose=0)
359
- idx = int(np.argmax(preds, axis=1)[0])
360
- label = _label_map.get(idx, EMOTIONS_ALLOWED[idx % len(EMOTIONS_ALLOWED)])
361
- return label
362
- else:
363
- feat = compute_mfcc_feature(y_audio).mean(axis=0)
364
- pred_enc = _rf_model.predict([feat])[0]
365
- label = _rf_meta["label_encoder"].inverse_transform([pred_enc])[0]
366
- label = label.lower()
367
- mapping = {"sadness": "sad", "joy":"happy", "happiness":"happy", "neutral":"neutral", "anger":"angry"}
368
- return mapping.get(label, label)
369
  except Exception as e:
370
- print(f"Error in emotion prediction: {e}")
371
- return random.choice(EMOTIONS_ALLOWED)
372
 
373
- # ---------------- Supportive short messages (Style 3) ----------------
374
  SUPPORT_MESSAGES = {
375
  "sad": "I'm sorry you're feeling sad. I'm here for you.",
376
  "angry": "It's okay to feel angry. I'm here to listen.",
@@ -378,100 +177,53 @@ SUPPORT_MESSAGES = {
378
  "neutral": "Thanks for sharing. I'm here whenever you need to talk."
379
  }
380
 
381
- def make_tts_for_message(text, lang="en"):
382
- try:
383
- tts = gTTS(text, lang=lang)
384
- tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
385
- tts.save(tmp.name)
386
- return tmp.name
387
- except Exception as e:
388
- print(f"TTS error: {e}")
389
- return None
390
 
391
- # ---------------- Combined Voice Chat (now with emotion detection) ----------------
392
  def voice_chat_combined(audio_path, language):
393
- """
394
- Steps:
395
- 1) transcribe audio (whisper if available)
396
- 2) detect emotion from audio tone
397
- 3) combine meaning + emotion to craft a short supportive message
398
- 4) return text and TTS audio path
399
- """
400
  if not audio_path:
401
- return "No audio received. Please speak into the microphone.", None
402
 
403
- # 1) Transcribe
404
- user_text = ""
405
- try:
406
- if whisper_model is not None:
407
- res = whisper_model.transcribe(audio_path)
408
- user_text = res.get("text", "").strip()
409
- else:
410
- # fallback: attempt a quick librosa-based silence check and return empty transcription
411
- user_text = ""
412
- except Exception as e:
413
- user_text = ""
414
-
415
- # 2) Emotion detection from tone
416
- try:
417
- emotion = predict_emotion_from_audiofile(audio_path)
418
- print(f"Detected emotion: {emotion}")
419
- except Exception as e:
420
- print(f"Error detecting emotion: {e}")
421
- emotion = random.choice(EMOTIONS_ALLOWED)
422
-
423
- # 3) Craft combined response (short & simple style)
424
- emo_cap = emotion.capitalize()
425
- support = SUPPORT_MESSAGES.get(emotion, "I hear you. I'm here for you.")
426
 
427
- # include a brief echo of user text if available (first 60 chars)
428
- if user_text:
429
- echo = user_text.strip()
430
- if len(echo) > 60:
431
- echo = echo[:57].rsplit(" ", 1)[0] + "..."
432
- combined_text = f"Detected Emotion: {emo_cap}\n{support}\nYou said: \"{echo}\""
433
- else:
434
- combined_text = f"Detected Emotion: {emo_cap}\n{support}"
435
-
436
- # 4) TTS (language selection: use Arabic if language == Arabic and gTTS supports it)
437
- tts_lang = "ar" if (language and language.lower().startswith("arab")) else "en"
438
  tts_path = make_tts_for_message(support, lang=tts_lang)
 
 
439
 
440
- return combined_text, tts_path
441
-
442
- # ---------------- Gradio UI ----------------
443
  def clear_text():
444
  return "", ""
445
 
446
- with gr.Blocks(title="🧠 Mental Health Therapy Chatbot (Voice + Emotion)") as demo:
447
  gr.Markdown("# 🧠 Mental Health Therapy Chatbot")
448
- gr.Markdown("A supportive space for mental health conversations in English and Arabic")
449
 
450
  with gr.Tabs():
451
- # Text Chat (unchanged)
452
  with gr.Tab("💬 Text Chat"):
453
- gr.Markdown("### Chat with me about how you're feeling")
454
  with gr.Row():
455
  with gr.Column():
456
- text_input = gr.Textbox(
457
- label="Type your message here...",
458
- placeholder="How are you feeling today?",
459
- lines=3
460
- )
461
  text_submit = gr.Button("Send Message", variant="primary")
462
  with gr.Column():
463
- text_output = gr.Textbox(
464
- label="Response",
465
- interactive=False,
466
- lines=5
467
- )
468
  text_submit.click(fn=respond, inputs=[text_input], outputs=[text_output])
469
  clear_btn = gr.Button("Clear Conversation")
470
  clear_btn.click(fn=clear_text, outputs=[text_input, text_output])
471
 
472
- # Voice Chat (merged with emotion detection)
473
  with gr.Tab("🎙️ Voice Chat"):
474
- gr.Markdown("### Speak to me in English or Arabic — I'll listen to what you say AND how you say it.")
475
  with gr.Row():
476
  with gr.Column():
477
  audio_input_v = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak Here")
@@ -480,8 +232,9 @@ with gr.Blocks(title="🧠 Mental Health Therapy Chatbot (Voice + Emotion)") as
480
  with gr.Column():
481
  voice_output_text = gr.Textbox(label="💬 Chatbot Response (text)", lines=6, interactive=False)
482
  voice_output_audio = gr.Audio(label="🔊 Voice Output (TTS)", interactive=False)
483
- voice_submit.click(fn=voice_chat_combined, inputs=[audio_input_v, language_input], outputs=[voice_output_text, voice_output_audio])
 
484
 
485
  if __name__ == "__main__":
486
  print("Starting Mental Health Therapy Chatbot...")
487
- demo.launch(share=True)
 
1
+ # ======================= app.py =======================
2
+
3
  import os
4
  import re
5
  import random
 
7
  import warnings
8
  warnings.filterwarnings("ignore")
9
 
 
10
  import numpy as np
11
  import pandas as pd
12
  import soundfile as sf
13
  import librosa
14
  import joblib
15
+ import gradio as gr
16
+ from gtts import gTTS
 
 
 
 
 
 
 
 
17
  from sklearn.ensemble import RandomForestClassifier
18
  from sklearn.preprocessing import LabelEncoder
19
 
20
+ # ======================= Configuration =======================
21
+ CSV_PATH = "deepseek_csv_20251105_09a9e0.csv"
22
+ MULTIMODAL_CSV = "multimodal_audio_dataset.csv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  AUDIO_FOLDER = "Dataset"
 
 
 
24
  SAMPLE_RATE = 16000
25
+ EMOTIONS_ALLOWED = ["sad", "happy", "anxious", "neutral", "angry"]
 
 
26
 
 
27
  os.makedirs(AUDIO_FOLDER, exist_ok=True)
28
 
29
+ # ======================= Audio Feature Extraction =======================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ def extract_audio_features(audio_file):
32
+ """
33
+ Extract audio features from a .wav file:
34
+ - Pitch (fundamental frequency)
35
+ - Energy/Intensity
36
+ - Tempo (speaking rate)
37
+ """
38
+ y, sr = librosa.load(audio_file, sr=SAMPLE_RATE)
39
+ features = {}
40
 
41
+ # Pitch features
42
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
43
+ pitch_values = [pitches[magnitudes[:, t].argmax(), t]
44
+ for t in range(pitches.shape[1])
45
+ if magnitudes[:, t].max() > 0]
46
+
47
+ features['pitch_mean'] = np.mean(pitch_values) if pitch_values else 0
48
+ features['pitch_std'] = np.std(pitch_values) if pitch_values else 0
49
+ features['energy_mean'] = np.mean(librosa.feature.rms(y=y)[0])
50
+ tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
51
+ features['tempo'] = tempo
52
+
53
+ return features
54
 
55
+ def generate_audio_training_data(n_samples=100):
56
+ """
57
+ Generate synthetic audio dataset for 5 emotions
58
+ """
59
+ np.random.seed(42)
60
+ emotions = ['sad', 'happy', 'anxious', 'neutral', 'angry']
61
+ data = []
62
+
63
+ for emo in emotions:
64
+ for _ in range(n_samples):
65
+ if emo == 'sad':
66
+ pitch_mean = np.random.normal(130, 5)
67
+ pitch_std = np.random.normal(15, 5)
68
+ energy_mean = np.random.uniform(0.015, 0.04)
69
+ tempo = np.random.uniform(70, 90)
70
+ elif emo == 'happy':
71
+ pitch_mean = np.random.normal(220, 10)
72
+ pitch_std = np.random.normal(45, 10)
73
+ energy_mean = np.random.uniform(0.1, 0.15)
74
+ tempo = np.random.uniform(100, 130)
75
+ elif emo == 'anxious':
76
+ pitch_mean = np.random.normal(180, 10)
77
+ pitch_std = np.random.normal(60, 10)
78
+ energy_mean = np.random.uniform(0.06, 0.09)
79
+ tempo = np.random.uniform(120, 150)
80
+ elif emo == 'neutral':
81
+ pitch_mean = np.random.normal(160, 10)
82
+ pitch_std = np.random.normal(25, 5)
83
+ energy_mean = np.random.uniform(0.05, 0.08)
84
+ tempo = np.random.uniform(90, 110)
85
+ elif emo == 'angry':
86
+ pitch_mean = np.random.normal(210, 10)
87
+ pitch_std = np.random.normal(50, 10)
88
+ energy_mean = np.random.uniform(0.12, 0.18)
89
+ tempo = np.random.uniform(120, 160)
90
+ data.append([pitch_mean, pitch_std, energy_mean, tempo, emo])
91
+
92
+ df = pd.DataFrame(data, columns=['pitch_mean','pitch_std','energy_mean','tempo','true_emotion'])
93
+ df.to_csv(MULTIMODAL_CSV, index=False)
94
+ print("✓ Multimodal audio dataset saved as 'multimodal_audio_dataset.csv'")
95
+ return df
96
+
97
+ # ======================= Train Audio RF Model =======================
98
+ if not os.path.exists(MULTIMODAL_CSV):
99
+ generate_audio_training_data(n_samples=100)
100
+
101
+ df_audio = pd.read_csv(MULTIMODAL_CSV)
102
+ X_audio = df_audio[['pitch_mean','pitch_std','energy_mean','tempo']]
103
+ y_audio = df_audio['true_emotion']
104
+
105
+ _audio_rf_le = LabelEncoder()
106
+ y_enc = _audio_rf_le.fit_transform(y_audio)
107
+
108
+ _audio_rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
109
+ _audio_rf_model.fit(X_audio, y_enc)
110
+ print("✓ Audio RF model trained from multimodal dataset")
111
+
112
+ # ======================= Emotion Prediction =======================
113
+ def predict_emotion_from_audiofile(audio_filepath):
114
+ try:
115
+ features = extract_audio_features(audio_filepath)
116
+ X = np.array([[features['pitch_mean'], features['pitch_std'], features['energy_mean'], features['tempo']]])
117
+ pred_enc = _audio_rf_model.predict(X)[0]
118
+ label = _audio_rf_le.inverse_transform([pred_enc])[0].lower()
119
+ return label
120
+ except Exception as e:
121
+ print(f"Error predicting emotion: {e}")
122
+ return random.choice(EMOTIONS_ALLOWED)
123
+
124
+ # ======================= Mental Health Text Chat =======================
125
+ MENTAL_KEYWORDS = ["depression","anxiety","stress","sad","trauma","therapy","mental","emotion","feel","help"]
126
+ OFF_TOPIC = ["song","music","joke","game","food","movie","sport","money","business"]
127
  MENTAL_RESPONSES_EN = [
128
+ "I hear that you're going through a difficult time.",
129
+ "Thank you for sharing that with me. Your feelings are valid.",
130
+ "I'm here to listen and support you. Would you like to talk more?",
 
 
 
 
131
  ]
132
 
133
+ OFF_TOPIC_RESPONSES = [
134
+ "Let's focus on emotional well-being. How are you feeling today?",
135
+ "I specialize in mental health conversations. Tell me how you're feeling.",
 
 
 
 
 
136
  ]
137
 
138
  def contains_arabic(text: str) -> bool:
 
140
 
141
  def is_mental_health_related(text: str) -> bool:
142
  text_lower = text.lower()
 
143
  if any(word in text_lower for word in OFF_TOPIC):
144
  return False
145
  if any(word in text_lower for word in MENTAL_KEYWORDS):
146
  return True
147
+ if contains_arabic(text_lower):
148
  return True
149
  return False
150
 
 
154
  if not is_mental_health_related(message):
155
  return random.choice(OFF_TOPIC_RESPONSES)
156
  if contains_arabic(message):
157
+ return "أنا هنا لدعمك، كيف تشعر اليوم؟"
158
  else:
159
  return random.choice(MENTAL_RESPONSES_EN)
160
 
161
+ # ======================= TTS Helper =======================
162
+ def make_tts_for_message(text, lang="en"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  try:
164
+ tts = gTTS(text, lang=lang)
165
+ tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
166
+ tts.save(tmp.name)
167
+ return tmp.name
 
 
 
 
 
 
 
 
 
 
 
 
168
  except Exception as e:
169
+ print(f"TTS error: {e}")
170
+ return None
171
 
172
+ # ======================= Combined Voice Chat =======================
173
  SUPPORT_MESSAGES = {
174
  "sad": "I'm sorry you're feeling sad. I'm here for you.",
175
  "angry": "It's okay to feel angry. I'm here to listen.",
 
177
  "neutral": "Thanks for sharing. I'm here whenever you need to talk."
178
  }
179
 
180
+ def load_audio(path, sr=SAMPLE_RATE):
181
+ if not os.path.isfile(path):
182
+ raise FileNotFoundError(f"Audio file not found: {path}")
183
+ data, orig_sr = sf.read(path, dtype='float32')
184
+ if data.ndim > 1:
185
+ data = np.mean(data, axis=1)
186
+ if orig_sr != sr:
187
+ data = librosa.resample(data, orig_sr, sr)
188
+ return data
189
 
 
190
  def voice_chat_combined(audio_path, language):
 
 
 
 
 
 
 
191
  if not audio_path:
192
+ return "No audio received. Please speak.", None
193
 
194
+ # Emotion detection
195
+ emotion = predict_emotion_from_audiofile(audio_path)
196
+ print(f"Detected emotion: {emotion}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
+ support = SUPPORT_MESSAGES.get(emotion, "I hear you. I'm here for you.")
199
+ tts_lang = "ar" if language.lower().startswith("arab") else "en"
 
 
 
 
 
 
 
 
 
200
  tts_path = make_tts_for_message(support, lang=tts_lang)
201
+
202
+ return f"Detected Emotion: {emotion.capitalize()}\n{support}", tts_path
203
 
204
+ # ======================= Gradio UI =======================
 
 
205
  def clear_text():
206
  return "", ""
207
 
208
+ with gr.Blocks(title="🧠 Mental Health Therapy Chatbot") as demo:
209
  gr.Markdown("# 🧠 Mental Health Therapy Chatbot")
210
+ gr.Markdown("Supportive space for mental health conversations (English/Arabic)")
211
 
212
  with gr.Tabs():
 
213
  with gr.Tab("💬 Text Chat"):
214
+ gr.Markdown("### Chat about how you're feeling")
215
  with gr.Row():
216
  with gr.Column():
217
+ text_input = gr.Textbox(label="Type your message here...", lines=3)
 
 
 
 
218
  text_submit = gr.Button("Send Message", variant="primary")
219
  with gr.Column():
220
+ text_output = gr.Textbox(label="Response", interactive=False, lines=5)
 
 
 
 
221
  text_submit.click(fn=respond, inputs=[text_input], outputs=[text_output])
222
  clear_btn = gr.Button("Clear Conversation")
223
  clear_btn.click(fn=clear_text, outputs=[text_input, text_output])
224
 
 
225
  with gr.Tab("🎙️ Voice Chat"):
226
+ gr.Markdown("### Speak to me — I'll detect emotion and respond")
227
  with gr.Row():
228
  with gr.Column():
229
  audio_input_v = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak Here")
 
232
  with gr.Column():
233
  voice_output_text = gr.Textbox(label="💬 Chatbot Response (text)", lines=6, interactive=False)
234
  voice_output_audio = gr.Audio(label="🔊 Voice Output (TTS)", interactive=False)
235
+ voice_submit.click(fn=voice_chat_combined, inputs=[audio_input_v, language_input],
236
+ outputs=[voice_output_text, voice_output_audio])
237
 
238
  if __name__ == "__main__":
239
  print("Starting Mental Health Therapy Chatbot...")
240
+ demo.launch(share=True)