ESPnet
audio
classification
Shikhar Bharadwaj commited on
Commit
90a6e66
·
1 Parent(s): 0bd6689

Update model

Browse files
Files changed (19) hide show
  1. README.md +505 -0
  2. meta.yaml +8 -0
  3. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/beans_cbi/token_list +266 -0
  4. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/229epoch.pth +3 -0
  5. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/RESULTS.md +16 -0
  6. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/config.yaml +453 -0
  7. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/acc.png +0 -0
  8. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/backward_time.png +0 -0
  9. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/clip.png +0 -0
  10. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/forward_time.png +0 -0
  11. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/gpu_max_cached_mem_GB.png +0 -0
  12. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/grad_norm.png +0 -0
  13. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/iter_time.png +0 -0
  14. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/loss.png +0 -0
  15. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/loss_scale.png +0 -0
  16. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/macro_precision.png +0 -0
  17. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/optim0_lr0.png +0 -0
  18. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/optim_step_time.png +0 -0
  19. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/train_time.png +0 -0
README.md ADDED
@@ -0,0 +1,505 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - classification
6
+ datasets:
7
+ - beans
8
+ license: cc-by-4.0
9
+ ---
10
+
11
+ ## ESPnet2 CLS model
12
+
13
+ ### `espnet/OpenBEATS-Large-i3-cbi`
14
+
15
+ This model was trained by Shikhar Bharadwaj using beans recipe in [espnet](https://github.com/espnet/espnet/).
16
+
17
+ ## CLS config
18
+
19
+ <details><summary>expand</summary>
20
+
21
+ ```
22
+ config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earlarge3/conf/ear_large/beans_cbi.yaml
23
+ print_config: false
24
+ log_level: INFO
25
+ drop_last_iter: false
26
+ dry_run: false
27
+ iterator_type: sequence
28
+ valid_iterator_type: null
29
+ output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3
30
+ ngpu: 1
31
+ seed: 0
32
+ num_workers: 2
33
+ num_att_plot: 0
34
+ dist_backend: nccl
35
+ dist_init_method: env://
36
+ dist_world_size: null
37
+ dist_rank: null
38
+ local_rank: 0
39
+ dist_master_addr: null
40
+ dist_master_port: null
41
+ dist_launcher: null
42
+ multiprocessing_distributed: false
43
+ unused_parameters: true
44
+ sharded_ddp: false
45
+ use_deepspeed: false
46
+ deepspeed_config: null
47
+ gradient_as_bucket_view: true
48
+ ddp_comm_hook: null
49
+ cudnn_enabled: true
50
+ cudnn_benchmark: false
51
+ cudnn_deterministic: true
52
+ use_tf32: false
53
+ collect_stats: false
54
+ write_collected_feats: false
55
+ max_epoch: 250
56
+ patience: null
57
+ val_scheduler_criterion:
58
+ - valid
59
+ - loss
60
+ early_stopping_criterion:
61
+ - valid
62
+ - loss
63
+ - min
64
+ best_model_criterion:
65
+ - - valid
66
+ - acc
67
+ - max
68
+ keep_nbest_models: 1
69
+ nbest_averaging_interval: 0
70
+ grad_clip: 1
71
+ grad_clip_type: 2.0
72
+ grad_noise: false
73
+ accum_grad: 1
74
+ no_forward_run: false
75
+ resume: true
76
+ train_dtype: float32
77
+ use_amp: false
78
+ log_interval: null
79
+ use_matplotlib: true
80
+ use_tensorboard: true
81
+ create_graph_in_tensorboard: false
82
+ use_wandb: true
83
+ wandb_project: audioverse
84
+ wandb_id: null
85
+ wandb_entity: shikhar
86
+ wandb_name: beans_cbi.earlarge3
87
+ wandb_model_log_interval: -1
88
+ detect_anomaly: false
89
+ use_adapter: false
90
+ adapter: lora
91
+ save_strategy: all
92
+ adapter_conf: {}
93
+ pretrain_path: null
94
+ init_param: []
95
+ ignore_init_mismatch: false
96
+ freeze_param: []
97
+ num_iters_per_epoch: null
98
+ batch_size: 32
99
+ valid_batch_size: 32
100
+ batch_bins: 1000000
101
+ valid_batch_bins: null
102
+ category_sample_size: 10
103
+ train_shape_file:
104
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/train/speech_shape
105
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/train/label_shape
106
+ valid_shape_file:
107
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/valid/speech_shape
108
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/valid/label_shape
109
+ batch_type: folded
110
+ valid_batch_type: null
111
+ fold_length:
112
+ - 160000
113
+ - 5
114
+ sort_in_batch: descending
115
+ shuffle_within_batch: false
116
+ sort_batch: descending
117
+ multiple_iterator: false
118
+ utt2weight_file: null
119
+ chunk_length: 500
120
+ chunk_shift_ratio: 0.5
121
+ num_cache_chunks: 1024
122
+ chunk_excluded_key_prefixes: []
123
+ chunk_default_fs: null
124
+ chunk_max_abs_length: null
125
+ chunk_discard_short_samples: true
126
+ train_data_path_and_name_and_type:
127
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.train/wav.scp
128
+ - speech
129
+ - sound
130
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.train/text
131
+ - label
132
+ - text
133
+ valid_data_path_and_name_and_type:
134
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.dev/wav.scp
135
+ - speech
136
+ - sound
137
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.dev/text
138
+ - label
139
+ - text
140
+ multi_task_dataset: false
141
+ allow_variable_data_keys: false
142
+ max_cache_size: 0.0
143
+ max_cache_fd: 32
144
+ allow_multi_rates: false
145
+ valid_max_cache_size: null
146
+ exclude_weight_decay: false
147
+ exclude_weight_decay_conf: {}
148
+ optim: adamw
149
+ optim_conf:
150
+ lr: 3.0e-05
151
+ weight_decay: 0.01
152
+ betas:
153
+ - 0.9
154
+ - 0.98
155
+ scheduler: cosineannealingwarmuprestarts
156
+ scheduler_conf:
157
+ first_cycle_steps: 95000
158
+ warmup_steps: 8000
159
+ max_lr: 3.0e-05
160
+ min_lr: 5.0e-06
161
+ lightning_conf: {}
162
+ token_list:
163
+ - scoori
164
+ - bulori
165
+ - bushti
166
+ - blkpho
167
+ - brthum
168
+ - cacwre
169
+ - pasfly
170
+ - lesgol
171
+ - logshr
172
+ - macwar
173
+ - pinsis
174
+ - whbnut
175
+ - hamfly
176
+ - normoc
177
+ - grtgra
178
+ - houwre
179
+ - comyel
180
+ - grhowl
181
+ - houfin
182
+ - rocpig
183
+ - annhum
184
+ - astfly
185
+ - magwar
186
+ - wesmea
187
+ - wewpew
188
+ - spotow
189
+ - amerob
190
+ - daejun
191
+ - easmea
192
+ - greroa
193
+ - mouchi
194
+ - pilwoo
195
+ - comrav
196
+ - hoowar
197
+ - savspa
198
+ - warvir
199
+ - easblu
200
+ - gnttow
201
+ - ovenbi1
202
+ - rewbla
203
+ - robgro
204
+ - swathr
205
+ - tuftit
206
+ - westan
207
+ - winwre3
208
+ - btywar
209
+ - carwre
210
+ - herthr
211
+ - bewwre
212
+ - sora
213
+ - brdowl
214
+ - buggna
215
+ - casvir
216
+ - chispa
217
+ - fiespa
218
+ - aldfly
219
+ - killde
220
+ - moudov
221
+ - rebwoo
222
+ - bkpwar
223
+ - dowwoo
224
+ - greegr
225
+ - banswa
226
+ - orcwar
227
+ - plsvir
228
+ - y00475
229
+ - blugrb1
230
+ - gockin
231
+ - greyel
232
+ - larspa
233
+ - osprey
234
+ - sonspa
235
+ - yebfly
236
+ - blujay
237
+ - brnthr
238
+ - canwre
239
+ - clanut
240
+ - comred
241
+ - eastow
242
+ - haiwoo
243
+ - lesyel
244
+ - amepip
245
+ - easpho
246
+ - fiscro
247
+ - sposan
248
+ - wooscj2
249
+ - bkhgro
250
+ - labwoo
251
+ - lazbun
252
+ - marwre
253
+ - stejay
254
+ - weskin
255
+ - bkbwar
256
+ - buhvir
257
+ - cangoo
258
+ - canwar
259
+ - dusfly
260
+ - grcfly
261
+ - norcar
262
+ - wilsni1
263
+ - yerwar
264
+ - yetvir
265
+ - eucdov
266
+ - linspa
267
+ - norpar
268
+ - olsfly
269
+ - rebnut
270
+ - scatan
271
+ - bnhcow
272
+ - louwat
273
+ - norfli
274
+ - veery
275
+ - woothr
276
+ - btnwar
277
+ - cedwax
278
+ - chswar
279
+ - comgra
280
+ - indbun
281
+ - leabit
282
+ - leafly
283
+ - pinwar
284
+ - reevir1
285
+ - solsan
286
+ - bktspa
287
+ - foxspa
288
+ - houspa
289
+ - snobun
290
+ - vesspa
291
+ - yelwar
292
+ - brespa
293
+ - comgol
294
+ - coohaw
295
+ - gnwtea
296
+ - grbher3
297
+ - hergul
298
+ - mallar3
299
+ - swaspa
300
+ - brncre
301
+ - btbwar
302
+ - caster1
303
+ - eawpew
304
+ - rethaw
305
+ - rocwre
306
+ - ruckin
307
+ - semsan
308
+ - whtspa
309
+ - wlswar
310
+ - bkcchi
311
+ - bkchum
312
+ - amered
313
+ - norwat
314
+ - whcspa
315
+ - grycat
316
+ - balori
317
+ - purfin
318
+ - treswa
319
+ - wilfly
320
+ - comter
321
+ - belspa2
322
+ - juntit1
323
+ - comnig
324
+ - reshaw
325
+ - snogoo
326
+ - perfal
327
+ - gadwal
328
+ - grnher
329
+ - horlar
330
+ - lobdow
331
+ - bawwar
332
+ - amegfi
333
+ - commer
334
+ - ribgul
335
+ - casfin
336
+ - pibgre
337
+ - evegro
338
+ - pygnut
339
+ - brwhaw
340
+ - gryfly
341
+ - leasan
342
+ - barswa
343
+ - phaino
344
+ - amecro
345
+ - calqua
346
+ - amewoo
347
+ - pingro
348
+ - saypho
349
+ - semplo
350
+ - buwwar
351
+ - boboli
352
+ - amekes
353
+ - cowscj1
354
+ - amtspa
355
+ - lobcur
356
+ - belkin1
357
+ - pecsan
358
+ - prawar
359
+ - vigswa
360
+ - camwar
361
+ - easkin
362
+ - yebsap
363
+ - norsho
364
+ - gocspa
365
+ - rufhum
366
+ - baisan
367
+ - cliswa
368
+ - pinjay
369
+ - comloo
370
+ - baleag
371
+ - merlin
372
+ - yehbla
373
+ - calgul
374
+ - goleag
375
+ - nutwoo
376
+ - rusbla
377
+ - eursta
378
+ - ameavo
379
+ - lesnig
380
+ - palwar
381
+ - bkbmag1
382
+ - brebla
383
+ - sagthr
384
+ - bkbcuc
385
+ - wesgre
386
+ - redcro
387
+ - wiltur
388
+ - amebit
389
+ - sagspa1
390
+ - tunswa
391
+ - wooduc
392
+ - renpha
393
+ - whtswi
394
+ - bongul
395
+ - norhar2
396
+ - doccor
397
+ - lotduc
398
+ - chukar
399
+ - horgre
400
+ - nrwswa
401
+ - sheowl
402
+ - wesblu
403
+ - whfibi
404
+ - buwtea
405
+ - norpin
406
+ - eargre
407
+ - rebsap
408
+ - lewwoo
409
+ - rebmer
410
+ - wessan
411
+ - chiswi
412
+ - lecthr
413
+ - rthhum
414
+ - moublu
415
+ - amewig
416
+ - rinduc
417
+ - shshaw
418
+ - rufgro
419
+ - swahaw
420
+ - coshum
421
+ - truswa
422
+ - rudduc
423
+ - buffle
424
+ - hoomer
425
+ - gcrfin
426
+ - redhea
427
+ - <blank>
428
+ - <unk>
429
+ text_token_list: null
430
+ text_bpemodel: null
431
+ init: xavier_normal
432
+ input_size: 1
433
+ use_preprocessor: true
434
+ frontend: null
435
+ frontend_conf: {}
436
+ specaug: null
437
+ specaug_conf: {}
438
+ normalize: null
439
+ normalize_conf: {}
440
+ preencoder: null
441
+ preencoder_conf: {}
442
+ encoder: beats
443
+ encoder_conf:
444
+ beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter2_large2.tune_lr1.0e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
445
+ beats_config:
446
+ layer_wise_gradient_decay_ratio: 0.3
447
+ encoder_layerdrop: 0.1
448
+ dropout: 0.0
449
+ use_weighted_representation: false
450
+ specaug_config:
451
+ apply_time_warp: true
452
+ apply_freq_mask: false
453
+ apply_time_mask: true
454
+ time_mask_width_ratio_range:
455
+ - 0
456
+ - 0.06
457
+ num_time_mask: 1
458
+ roll_augment: true
459
+ roll_interval: 1
460
+ text_encoder: null
461
+ text_encoder_conf: {}
462
+ embedding_fusion: null
463
+ embedding_fusion_conf: {}
464
+ decoder: linear
465
+ decoder_conf: {}
466
+ model: espnet
467
+ model_conf:
468
+ classification_type: multi-class
469
+ lsm_weight: 0.1
470
+ required:
471
+ - output_dir
472
+ - token_list
473
+ version: '202412'
474
+ distributed: false
475
+ ```
476
+
477
+ </details>
478
+
479
+ ### Citations
480
+
481
+ ```BibTex
482
+
483
+ @article{bharadwaj2025openbeats,
484
+ title={OpenBEATs: A Fully Open-Source General-Purpose Audio Encoder},
485
+ author={Bharadwaj, Shikhar and Cornell, Samuele and Choi, Kwanghee and Fukayama, Satoru and Shim, Hye-jin and Deshmukh, Soham and Watanabe, Shinji},
486
+ journal={arXiv preprint arXiv:2507.14129},
487
+ year={2025}
488
+ }
489
+
490
+ @inproceedings{watanabe2018espnet,
491
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
492
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
493
+ year={2018},
494
+ booktitle={Proceedings of Interspeech},
495
+ pages={2207--2211},
496
+ doi={10.21437/Interspeech.2018-1456},
497
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
498
+ }
499
+
500
+
501
+
502
+
503
+
504
+
505
+ ```
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202503'
2
+ files:
3
+ classification_model_file: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/229epoch.pth
4
+ python: "3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) \n[GCC 12.3.0]"
5
+ timestamp: 1763331400.539316
6
+ torch: 2.1.2
7
+ yaml_files:
8
+ classification_train_config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/config.yaml
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/beans_cbi/token_list ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scoori
2
+ bulori
3
+ bushti
4
+ blkpho
5
+ brthum
6
+ cacwre
7
+ pasfly
8
+ lesgol
9
+ logshr
10
+ macwar
11
+ pinsis
12
+ whbnut
13
+ hamfly
14
+ normoc
15
+ grtgra
16
+ houwre
17
+ comyel
18
+ grhowl
19
+ houfin
20
+ rocpig
21
+ annhum
22
+ astfly
23
+ magwar
24
+ wesmea
25
+ wewpew
26
+ spotow
27
+ amerob
28
+ daejun
29
+ easmea
30
+ greroa
31
+ mouchi
32
+ pilwoo
33
+ comrav
34
+ hoowar
35
+ savspa
36
+ warvir
37
+ easblu
38
+ gnttow
39
+ ovenbi1
40
+ rewbla
41
+ robgro
42
+ swathr
43
+ tuftit
44
+ westan
45
+ winwre3
46
+ btywar
47
+ carwre
48
+ herthr
49
+ bewwre
50
+ sora
51
+ brdowl
52
+ buggna
53
+ casvir
54
+ chispa
55
+ fiespa
56
+ aldfly
57
+ killde
58
+ moudov
59
+ rebwoo
60
+ bkpwar
61
+ dowwoo
62
+ greegr
63
+ banswa
64
+ orcwar
65
+ plsvir
66
+ y00475
67
+ blugrb1
68
+ gockin
69
+ greyel
70
+ larspa
71
+ osprey
72
+ sonspa
73
+ yebfly
74
+ blujay
75
+ brnthr
76
+ canwre
77
+ clanut
78
+ comred
79
+ eastow
80
+ haiwoo
81
+ lesyel
82
+ amepip
83
+ easpho
84
+ fiscro
85
+ sposan
86
+ wooscj2
87
+ bkhgro
88
+ labwoo
89
+ lazbun
90
+ marwre
91
+ stejay
92
+ weskin
93
+ bkbwar
94
+ buhvir
95
+ cangoo
96
+ canwar
97
+ dusfly
98
+ grcfly
99
+ norcar
100
+ wilsni1
101
+ yerwar
102
+ yetvir
103
+ eucdov
104
+ linspa
105
+ norpar
106
+ olsfly
107
+ rebnut
108
+ scatan
109
+ bnhcow
110
+ louwat
111
+ norfli
112
+ veery
113
+ woothr
114
+ btnwar
115
+ cedwax
116
+ chswar
117
+ comgra
118
+ indbun
119
+ leabit
120
+ leafly
121
+ pinwar
122
+ reevir1
123
+ solsan
124
+ bktspa
125
+ foxspa
126
+ houspa
127
+ snobun
128
+ vesspa
129
+ yelwar
130
+ brespa
131
+ comgol
132
+ coohaw
133
+ gnwtea
134
+ grbher3
135
+ hergul
136
+ mallar3
137
+ swaspa
138
+ brncre
139
+ btbwar
140
+ caster1
141
+ eawpew
142
+ rethaw
143
+ rocwre
144
+ ruckin
145
+ semsan
146
+ whtspa
147
+ wlswar
148
+ bkcchi
149
+ bkchum
150
+ amered
151
+ norwat
152
+ whcspa
153
+ grycat
154
+ balori
155
+ purfin
156
+ treswa
157
+ wilfly
158
+ comter
159
+ belspa2
160
+ juntit1
161
+ comnig
162
+ reshaw
163
+ snogoo
164
+ perfal
165
+ gadwal
166
+ grnher
167
+ horlar
168
+ lobdow
169
+ bawwar
170
+ amegfi
171
+ commer
172
+ ribgul
173
+ casfin
174
+ pibgre
175
+ evegro
176
+ pygnut
177
+ brwhaw
178
+ gryfly
179
+ leasan
180
+ barswa
181
+ phaino
182
+ amecro
183
+ calqua
184
+ amewoo
185
+ pingro
186
+ saypho
187
+ semplo
188
+ buwwar
189
+ boboli
190
+ amekes
191
+ cowscj1
192
+ amtspa
193
+ lobcur
194
+ belkin1
195
+ pecsan
196
+ prawar
197
+ vigswa
198
+ camwar
199
+ easkin
200
+ yebsap
201
+ norsho
202
+ gocspa
203
+ rufhum
204
+ baisan
205
+ cliswa
206
+ pinjay
207
+ comloo
208
+ baleag
209
+ merlin
210
+ yehbla
211
+ calgul
212
+ goleag
213
+ nutwoo
214
+ rusbla
215
+ eursta
216
+ ameavo
217
+ lesnig
218
+ palwar
219
+ bkbmag1
220
+ brebla
221
+ sagthr
222
+ bkbcuc
223
+ wesgre
224
+ redcro
225
+ wiltur
226
+ amebit
227
+ sagspa1
228
+ tunswa
229
+ wooduc
230
+ renpha
231
+ whtswi
232
+ bongul
233
+ norhar2
234
+ doccor
235
+ lotduc
236
+ chukar
237
+ horgre
238
+ nrwswa
239
+ sheowl
240
+ wesblu
241
+ whfibi
242
+ buwtea
243
+ norpin
244
+ eargre
245
+ rebsap
246
+ lewwoo
247
+ rebmer
248
+ wessan
249
+ chiswi
250
+ lecthr
251
+ rthhum
252
+ moublu
253
+ amewig
254
+ rinduc
255
+ shshaw
256
+ rufgro
257
+ swahaw
258
+ coshum
259
+ truswa
260
+ rudduc
261
+ buffle
262
+ hoomer
263
+ gcrfin
264
+ redhea
265
+ <blank>
266
+ <unk>
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/229epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd1b3139d11e69207f458451f76eeb834526a08883d52c0518b836984aeaf9e5
3
+ size 1246778335
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/RESULTS.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_cls_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Sat Mar 22 12:41:57 CDT 2025`
5
+ - python version: `3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) [GCC 12.3.0]`
6
+ - espnet version: `espnet 202412`
7
+ - pytorch version: `pytorch 2.6.0.dev20241210+cu124`
8
+ - Git hash: `ee8dd3d5da745a2c08c2bd6518bc0ba41ba5b224`
9
+ - Commit date: `Thu Mar 20 16:45:17 2025 -0500`
10
+
11
+ ## cls_earlarge3
12
+ |Split|mean_acc|mAP|mean_auc|n_labels|n_instances|
13
+ |---|---|---|---|---|---|
14
+ cls_cbi.dev|71.98|71.88|94.33|264.00|3548.00
15
+ cls_cbi.test|69.42|71.66|96.41|264.00|3620.00
16
+
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/config.yaml ADDED
@@ -0,0 +1,453 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earlarge3/conf/ear_large/beans_cbi.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 2
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: true
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 250
35
+ patience: null
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ best_model_criterion:
44
+ - - valid
45
+ - acc
46
+ - max
47
+ keep_nbest_models: 1
48
+ nbest_averaging_interval: 0
49
+ grad_clip: 1
50
+ grad_clip_type: 2.0
51
+ grad_noise: false
52
+ accum_grad: 1
53
+ no_forward_run: false
54
+ resume: true
55
+ train_dtype: float32
56
+ use_amp: false
57
+ log_interval: null
58
+ use_matplotlib: true
59
+ use_tensorboard: true
60
+ create_graph_in_tensorboard: false
61
+ use_wandb: true
62
+ wandb_project: audioverse
63
+ wandb_id: null
64
+ wandb_entity: shikhar
65
+ wandb_name: beans_cbi.earlarge3
66
+ wandb_model_log_interval: -1
67
+ detect_anomaly: false
68
+ use_adapter: false
69
+ adapter: lora
70
+ save_strategy: all
71
+ adapter_conf: {}
72
+ pretrain_path: null
73
+ init_param: []
74
+ ignore_init_mismatch: false
75
+ freeze_param: []
76
+ num_iters_per_epoch: null
77
+ batch_size: 32
78
+ valid_batch_size: 32
79
+ batch_bins: 1000000
80
+ valid_batch_bins: null
81
+ category_sample_size: 10
82
+ train_shape_file:
83
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/train/speech_shape
84
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/train/label_shape
85
+ valid_shape_file:
86
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/valid/speech_shape
87
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/valid/label_shape
88
+ batch_type: folded
89
+ valid_batch_type: null
90
+ fold_length:
91
+ - 160000
92
+ - 5
93
+ sort_in_batch: descending
94
+ shuffle_within_batch: false
95
+ sort_batch: descending
96
+ multiple_iterator: false
97
+ utt2weight_file: null
98
+ chunk_length: 500
99
+ chunk_shift_ratio: 0.5
100
+ num_cache_chunks: 1024
101
+ chunk_excluded_key_prefixes: []
102
+ chunk_default_fs: null
103
+ chunk_max_abs_length: null
104
+ chunk_discard_short_samples: true
105
+ train_data_path_and_name_and_type:
106
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.train/wav.scp
107
+ - speech
108
+ - sound
109
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.train/text
110
+ - label
111
+ - text
112
+ valid_data_path_and_name_and_type:
113
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.dev/wav.scp
114
+ - speech
115
+ - sound
116
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.dev/text
117
+ - label
118
+ - text
119
+ multi_task_dataset: false
120
+ allow_variable_data_keys: false
121
+ max_cache_size: 0.0
122
+ max_cache_fd: 32
123
+ allow_multi_rates: false
124
+ valid_max_cache_size: null
125
+ exclude_weight_decay: false
126
+ exclude_weight_decay_conf: {}
127
+ optim: adamw
128
+ optim_conf:
129
+ lr: 3.0e-05
130
+ weight_decay: 0.01
131
+ betas:
132
+ - 0.9
133
+ - 0.98
134
+ scheduler: cosineannealingwarmuprestarts
135
+ scheduler_conf:
136
+ first_cycle_steps: 95000
137
+ warmup_steps: 8000
138
+ max_lr: 3.0e-05
139
+ min_lr: 5.0e-06
140
+ lightning_conf: {}
141
+ token_list:
142
+ - scoori
143
+ - bulori
144
+ - bushti
145
+ - blkpho
146
+ - brthum
147
+ - cacwre
148
+ - pasfly
149
+ - lesgol
150
+ - logshr
151
+ - macwar
152
+ - pinsis
153
+ - whbnut
154
+ - hamfly
155
+ - normoc
156
+ - grtgra
157
+ - houwre
158
+ - comyel
159
+ - grhowl
160
+ - houfin
161
+ - rocpig
162
+ - annhum
163
+ - astfly
164
+ - magwar
165
+ - wesmea
166
+ - wewpew
167
+ - spotow
168
+ - amerob
169
+ - daejun
170
+ - easmea
171
+ - greroa
172
+ - mouchi
173
+ - pilwoo
174
+ - comrav
175
+ - hoowar
176
+ - savspa
177
+ - warvir
178
+ - easblu
179
+ - gnttow
180
+ - ovenbi1
181
+ - rewbla
182
+ - robgro
183
+ - swathr
184
+ - tuftit
185
+ - westan
186
+ - winwre3
187
+ - btywar
188
+ - carwre
189
+ - herthr
190
+ - bewwre
191
+ - sora
192
+ - brdowl
193
+ - buggna
194
+ - casvir
195
+ - chispa
196
+ - fiespa
197
+ - aldfly
198
+ - killde
199
+ - moudov
200
+ - rebwoo
201
+ - bkpwar
202
+ - dowwoo
203
+ - greegr
204
+ - banswa
205
+ - orcwar
206
+ - plsvir
207
+ - y00475
208
+ - blugrb1
209
+ - gockin
210
+ - greyel
211
+ - larspa
212
+ - osprey
213
+ - sonspa
214
+ - yebfly
215
+ - blujay
216
+ - brnthr
217
+ - canwre
218
+ - clanut
219
+ - comred
220
+ - eastow
221
+ - haiwoo
222
+ - lesyel
223
+ - amepip
224
+ - easpho
225
+ - fiscro
226
+ - sposan
227
+ - wooscj2
228
+ - bkhgro
229
+ - labwoo
230
+ - lazbun
231
+ - marwre
232
+ - stejay
233
+ - weskin
234
+ - bkbwar
235
+ - buhvir
236
+ - cangoo
237
+ - canwar
238
+ - dusfly
239
+ - grcfly
240
+ - norcar
241
+ - wilsni1
242
+ - yerwar
243
+ - yetvir
244
+ - eucdov
245
+ - linspa
246
+ - norpar
247
+ - olsfly
248
+ - rebnut
249
+ - scatan
250
+ - bnhcow
251
+ - louwat
252
+ - norfli
253
+ - veery
254
+ - woothr
255
+ - btnwar
256
+ - cedwax
257
+ - chswar
258
+ - comgra
259
+ - indbun
260
+ - leabit
261
+ - leafly
262
+ - pinwar
263
+ - reevir1
264
+ - solsan
265
+ - bktspa
266
+ - foxspa
267
+ - houspa
268
+ - snobun
269
+ - vesspa
270
+ - yelwar
271
+ - brespa
272
+ - comgol
273
+ - coohaw
274
+ - gnwtea
275
+ - grbher3
276
+ - hergul
277
+ - mallar3
278
+ - swaspa
279
+ - brncre
280
+ - btbwar
281
+ - caster1
282
+ - eawpew
283
+ - rethaw
284
+ - rocwre
285
+ - ruckin
286
+ - semsan
287
+ - whtspa
288
+ - wlswar
289
+ - bkcchi
290
+ - bkchum
291
+ - amered
292
+ - norwat
293
+ - whcspa
294
+ - grycat
295
+ - balori
296
+ - purfin
297
+ - treswa
298
+ - wilfly
299
+ - comter
300
+ - belspa2
301
+ - juntit1
302
+ - comnig
303
+ - reshaw
304
+ - snogoo
305
+ - perfal
306
+ - gadwal
307
+ - grnher
308
+ - horlar
309
+ - lobdow
310
+ - bawwar
311
+ - amegfi
312
+ - commer
313
+ - ribgul
314
+ - casfin
315
+ - pibgre
316
+ - evegro
317
+ - pygnut
318
+ - brwhaw
319
+ - gryfly
320
+ - leasan
321
+ - barswa
322
+ - phaino
323
+ - amecro
324
+ - calqua
325
+ - amewoo
326
+ - pingro
327
+ - saypho
328
+ - semplo
329
+ - buwwar
330
+ - boboli
331
+ - amekes
332
+ - cowscj1
333
+ - amtspa
334
+ - lobcur
335
+ - belkin1
336
+ - pecsan
337
+ - prawar
338
+ - vigswa
339
+ - camwar
340
+ - easkin
341
+ - yebsap
342
+ - norsho
343
+ - gocspa
344
+ - rufhum
345
+ - baisan
346
+ - cliswa
347
+ - pinjay
348
+ - comloo
349
+ - baleag
350
+ - merlin
351
+ - yehbla
352
+ - calgul
353
+ - goleag
354
+ - nutwoo
355
+ - rusbla
356
+ - eursta
357
+ - ameavo
358
+ - lesnig
359
+ - palwar
360
+ - bkbmag1
361
+ - brebla
362
+ - sagthr
363
+ - bkbcuc
364
+ - wesgre
365
+ - redcro
366
+ - wiltur
367
+ - amebit
368
+ - sagspa1
369
+ - tunswa
370
+ - wooduc
371
+ - renpha
372
+ - whtswi
373
+ - bongul
374
+ - norhar2
375
+ - doccor
376
+ - lotduc
377
+ - chukar
378
+ - horgre
379
+ - nrwswa
380
+ - sheowl
381
+ - wesblu
382
+ - whfibi
383
+ - buwtea
384
+ - norpin
385
+ - eargre
386
+ - rebsap
387
+ - lewwoo
388
+ - rebmer
389
+ - wessan
390
+ - chiswi
391
+ - lecthr
392
+ - rthhum
393
+ - moublu
394
+ - amewig
395
+ - rinduc
396
+ - shshaw
397
+ - rufgro
398
+ - swahaw
399
+ - coshum
400
+ - truswa
401
+ - rudduc
402
+ - buffle
403
+ - hoomer
404
+ - gcrfin
405
+ - redhea
406
+ - <blank>
407
+ - <unk>
408
+ text_token_list: null
409
+ text_bpemodel: null
410
+ init: xavier_normal
411
+ input_size: 1
412
+ use_preprocessor: true
413
+ frontend: null
414
+ frontend_conf: {}
415
+ specaug: null
416
+ specaug_conf: {}
417
+ normalize: null
418
+ normalize_conf: {}
419
+ preencoder: null
420
+ preencoder_conf: {}
421
+ encoder: beats
422
+ encoder_conf:
423
+ beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter2_large2.tune_lr1.0e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
424
+ beats_config:
425
+ layer_wise_gradient_decay_ratio: 0.3
426
+ encoder_layerdrop: 0.1
427
+ dropout: 0.0
428
+ use_weighted_representation: false
429
+ specaug_config:
430
+ apply_time_warp: true
431
+ apply_freq_mask: false
432
+ apply_time_mask: true
433
+ time_mask_width_ratio_range:
434
+ - 0
435
+ - 0.06
436
+ num_time_mask: 1
437
+ roll_augment: true
438
+ roll_interval: 1
439
+ text_encoder: null
440
+ text_encoder_conf: {}
441
+ embedding_fusion: null
442
+ embedding_fusion_conf: {}
443
+ decoder: linear
444
+ decoder_conf: {}
445
+ model: espnet
446
+ model_conf:
447
+ classification_type: multi-class
448
+ lsm_weight: 0.1
449
+ required:
450
+ - output_dir
451
+ - token_list
452
+ version: '202412'
453
+ distributed: false
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/acc.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/backward_time.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/clip.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/forward_time.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/gpu_max_cached_mem_GB.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/grad_norm.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/iter_time.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/loss.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/loss_scale.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/macro_precision.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/optim0_lr0.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/optim_step_time.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge3/images/train_time.png ADDED