Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

added_tokens.json +21 -0
merges.txt +0 -0
s3gen.safetensors +3 -0
special_tokens_map.json +24 -0
t3_turbo_v1.safetensors +3 -0
t3_turbo_v1.yaml +387 -0
tokenizer_config.json +175 -0
ve.safetensors +3 -0
vocab.json +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "[advertisement]": 50261,
+  "[angry]": 50257,
+  "[chuckle]": 50274,
+  "[clear throat]": 50267,
+  "[cough]": 50270,
+  "[crying]": 50264,
+  "[dramatic]": 50262,
+  "[fear]": 50258,
+  "[gasp]": 50273,
+  "[groan]": 50271,
+  "[happy]": 50265,
+  "[laugh]": 50275,
+  "[narration]": 50263,
+  "[sarcastic]": 50266,
+  "[shush]": 50269,
+  "[sigh]": 50268,
+  "[sniff]": 50272,
+  "[surprised]": 50259,
+  "[whispering]": 50260
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

s3gen.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b78103c654207393955e4900aac14a12de8ef25f4b09424f1ef91941f161d4e
+size 1056484620

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

t3_turbo_v1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcf1f8c1d651bb7e3acd69ee5be269b4ac10c02980b7708213d598bc9f7cdf87
+size 1915480052

t3_turbo_v1.yaml ADDED Viewed

	@@ -0,0 +1,387 @@

+align_f0: false
+align_loss_weight: 1.0
+asc_loss_weight: 0.02
+attention_mechanism: graves
+augment_sr: false
+base_model: null
+bit_depth: 9
+causal_convs: false
+causal_decoder: false
+clap_dims: 512
+compat_dcnar_f0_std_cond: false
+conv_stack_dilation: !!python/tuple
+- 1
+- 3
+- 9
+- 27
+convbn_bias: false
+cudnn_deterministic: false
+dcnar_1d_discrim: false
+dcnar_aligner_kernel: 5
+dcnar_aligner_type: null
+dcnar_allow_trivial_speaker_table: true
+dcnar_batch_size: 24
+dcnar_conformer: false
+dcnar_conformer_attn_chunk_size: null
+dcnar_conformer_attn_dim_head: 64
+dcnar_conformer_attn_ff_mult: 4
+dcnar_conformer_attn_win_size: null
+dcnar_conv_weight_grouping: 1
+dcnar_df0_loss_weight: 0.5
+dcnar_dim_lrg: 512
+dcnar_dim_sml: 256
+dcnar_dim_style: 32
+dcnar_discrim_tanh: false
+dcnar_dtw_loss_weight: 1
+dcnar_dur_loss_weight: 0.1
+dcnar_dur_pred_scale: linear
+dcnar_f0_cond_mel_decoding: false
+dcnar_f0_cond_mel_decoding_teacher_forcing: true
+dcnar_f0_loss_weight: 0.5
+dcnar_gan_dims: 64
+dcnar_global_style: true
+dcnar_hard_gumbel_tones: false
+dcnar_hubert_downsample: 1
+dcnar_inpaint_vae: false
+dcnar_inpaint_vae_kld_loss_weight: 0
+dcnar_inpaint_vae_latent_dim: 32
+dcnar_inpaint_vae_warmup_steps: 5000
+dcnar_inpaint_vae_weight_step_size: 0.0002
+dcnar_local_f0: false
+dcnar_local_intensity: false
+dcnar_local_style: false
+dcnar_lr: 0.0001
+dcnar_mel_adv: false
+dcnar_mel_loss_weight: 10.0
+dcnar_mixed_sr_loss: false
+dcnar_n_terminal_tones: 0
+dcnar_ph_f0_loss_weight: 1.0
+dcnar_ph_hubert_loss_weight: 1.0
+dcnar_ph_intensity_loss_weight: 1.0
+dcnar_pitch_adv: false
+dcnar_prosody_adv: false
+dcnar_prosody_stats_cond: false
+dcnar_pstat_weight_f0_mean: 10
+dcnar_pstat_weight_f0_std: 100
+dcnar_pstat_weight_intensity_mean: 10
+dcnar_pstat_weight_intensity_std: 0
+dcnar_pstat_weight_phdur_mean: 1
+dcnar_pstat_weight_phdur_std: 1
+dcnar_reverb_label: false
+dcnar_sampler: default
+dcnar_sr_label: false
+dcnar_terminal_tone_usl_weight: 0
+dcnar_terminal_tone_weight: 0
+dcnar_upsampling: gaussian
+dcnar_use_log_f0_frames: false
+dcnar_use_toucan_utt_embs: false
+dcnar_usl_mfcc: false
+dcnar_usl_mfcc_deltas: false
+dcnar_usl_mfcc_dim: 12
+dcnar_usl_mfcc_var_dec: false
+dcnar_usl_slim: false
+dcnar_usl_slim_dim: 16
+dcnar_usl_with_f0: false
+dcnar_utt_dur_loss_weight: 0
+dcnar_vc_local_hubert: false
+dcnar_vc_mode: nn
+dcnar_vc_text_predict: false
+dcnar_vuv_loss_weight: 0.5
+dcvoc_causal: false
+dcvoc_causal_lookahead: 3
+dcvoc_channel_downsample_mode: interleave
+dcvoc_convs_per_scale: 8
+dcvoc_disc_duplicates: 1
+dcvoc_disc_mpwd: true
+dcvoc_disc_mrsd: false
+dcvoc_disc_pdd: true
+dcvoc_disc_phase_aug: false
+dcvoc_discriminator_bound: 1.01
+dcvoc_groups_init: 8
+dcvoc_halfres_conv: true
+dcvoc_hidden_init: 1024
+dcvoc_hop: 8
+dcvoc_kernel: 7
+dcvoc_mel_bneck: 256
+dcvoc_smpwd_hidden_max: 1024
+dcvoc_smpwd_periods:
+- 2
+- 3
+- 5
+- 7
+- 9
+- 11
+- 13
+dcvoc_upsample_method: linear
+denoise: false
+dfd_clip_stft: 1.0e-09
+dfd_ramdisk_path: /mnt/ramdisk
+ema_coeff: 0.99995
+emo_embedded_speaker_id: false
+emotion_adv: false
+enable_eos_bos_chars: true
+encoder_type: voice_encoder
+eval_crosslang: false
+eval_langs: dataset
+eval_max_ref_samples: 192
+eval_max_repeats: 1
+eval_max_runs: 10
+eval_max_sentences: 192
+eval_mbnet_name: null
+eval_models_dir: saved_models
+eval_n_plots: 2
+eval_n_wavs: 4
+eval_reference: train
+eval_syn_batch_size: 64
+eval_text_source: default
+eval_ve_name: universal/ve_v2
+eval_voc_max_frames: 2000
+eval_voc_name: null
+f0_mode: praat
+flatten_lstm_params: true
+fmax: 16000
+fmin: 0
+frames_per_framegroup: 10
+freeze_mel_head: false
+gmvae_ema_lr: 0.0001
+gmvae_latent_dim: 16
+gmvae_num_components: 0
+gpt_masked_loss: false
+gpt_prod_max_text: 200
+gpt_speaker_ref_type: same_speaker
+gpt_transformer_type: gpt2-medium
+hifigan_channels: 256
+hooli_enc_dims: 256
+hooli_filter_size: 257
+hooli_inv_no_uv: false
+hooli_inv_pitch_diff_reg_weight: 0
+hooli_inv_pitch_shift_reg_weight: 0
+hooli_nfft: 16
+hooli_osc_freq_cutoff: 0.15
+hooli_safe_step: true
+hooli_tv_fir: false
+hooli_wn_dims: 64
+hooligan_discriminators: univnet
+hooligan_istft: true
+hop_size: 320
+input_pos_emb: handled_internally_by_backbone
+is_lora: false
+language_embed_size: 16
+legacy_gpt_hidden_size: 1024
+lfcc_nfilts: 128
+llama_config_name: Llama_520M
+lora_alpha: 64
+lora_dropout: 0.05
+lora_r: 32
+lossynet_bsize: 25
+lossynet_clip_stft: 1.0e-09
+lossynet_lr: 0.001
+lossynet_n_out_classes: 2
+lowest_sr: 8000
+max_LR: 0.001
+max_conditioning_inputs: 2
+max_decoder_frames: 2000
+max_f0_freq: 600
+max_speech_tokens: 604
+max_text_tokens: 402
+max_total_tokens: 8196
+mel_pad_difference: 1
+mel_power: 1.0
+mel_type: db
+min_LR: 1.0e-06
+min_f0_freq: 75
+mpbert_n_freeze: 0
+mpbert_tokenizer: null
+mpbert_type: transformer
+mu_law: true
+n_cqcc_bins: 96
+n_cqt_bins: 84
+n_fft: 2048
+n_gpt_channels: 1024
+n_reverbs: 256
+n_spk_cond_samples: 2
+n_state_per_symbol: 1
+n_transformer_heads: 16
+n_transformer_layers: 30
+normalize_loudness: false
+normalized_mels: true
+num_ceps: 29
+num_diacritcs: 512
+num_freq: 1025
+num_heads: 4
+num_mels: 256
+num_style_tokens: 0
+num_tones: 16
+onehot_language: false
+onehot_speaker: false
+pf_word_boundaries: false
+phonemizer_backend: espeak
+preemphasis: 0.97
+preemphasize_voc_target: false
+prenet_type: original
+project_conditioning: false
+prosody_embed_size: 0
+r_schedule:
+- - 1
+  - -1
+rvc_emb_channels: 768
+rvc_enc_spk_input: false
+rvc_f0_up: 0
+rvc_f0_voc: true
+rvc_filter_channels: 768
+rvc_gin_channels: 256
+rvc_hidden_channels: 192
+rvc_inter_channels: 192
+rvc_kernel_size: 3
+rvc_mel_bins: 80
+rvc_n_heads: 2
+rvc_n_layers: 6
+rvc_p_dropout: 0
+rvc_resblock: '1'
+rvc_resblock_dilation_sizes:
+- - 1
+  - 3
+  - 5
+- - 1
+  - 3
+  - 5
+- - 1
+  - 3
+  - 5
+rvc_resblock_kernel_sizes:
+- 3
+- 7
+- 11
+rvc_seg_enc_size_frames: 370
+rvc_seg_enc_size_samples: 118400
+rvc_seg_voc_size_frames: 40
+rvc_seg_voc_size_samples: 12800
+rvc_speaker_enc: table
+rvc_speaker_enc_type: V1
+rvc_speaker_pitch: null
+rvc_spec_channels: 513
+rvc_spk_embed_dim: 109
+rvc_stft_filter_len: 1024
+rvc_stft_win_len: 1024
+rvc_train_kl_weight: 1.0
+rvc_train_mel_weight: 45
+rvc_upsample_initial_channel: 512
+rvc_upsample_kernel_sizes:
+- 20
+- 16
+- 4
+- 4
+rvc_upsample_rates:
+- 10
+- 8
+- 2
+- 2
+rvc_use_f0: true
+sample_rate: 32000
+scheduler_max_total_steps: 200000
+seed: 0
+self_conditioning: false
+separate_stopnet: false
+singing_dim: 4
+speaker_embed_size: 256
+speech_cond_prompt_len: 250
+speech_token_type: tortoise
+speech_tokens_dict_size: 6563
+speed_scale: 0.1
+start_speech_token: 6561
+start_text_token: 255
+stepwise_sigmoid_noise: 2.0
+stft_magnitude_min: 0.0001
+stop_speech_token: 6562
+stop_text_token: 0
+stop_threshold: 0.25
+style_embed_size: 256
+supports_cfg: false
+symbol_type: tortoise/data/gpt2_medium.json
+syn_ar_f0_predict: true
+syn_batch_frames: 16000
+syn_batch_size: 32
+syn_mel_scale: 1
+syn_predict_f0: true
+syn_sampler: binnedlength
+syn_symmetric_mel: false
+syn_train_max_frames: 700
+syn_train_min_duration: 1
+taco1_postnet: true
+taco_decoder_att_rnn_dim: 1024
+taco_decoder_prenet_dim: 256
+taco_decoder_rnn_dim: 1024
+taco_disjoint_conditioning: true
+taco_encoder_dim: 512
+taco_grad_clip: 1
+taco_loss_masking: true
+taco_lr: 0.0001
+taco_weight_decay: 1.0e-06
+target_loudness: -18
+text_loss_weight: 0.1
+text_preproc: none
+text_tokens_dict_size: 50276
+ti_vocoder: false
+toucan_utt_emb_dim: 704
+trim_silence: true
+upsample_factors: !!python/tuple
+- 5
+- 8
+- 8
+upsample_rate: null
+upsamplenet_dropout: false
+upsamplenet_lr: 1.0e-05
+use_adv_speaker_classifier: false
+use_clap_embeds: false
+use_diacritic: false
+use_emotion_table: false
+use_lamb_optimizer: false
+use_language_table: false
+use_monotonic_alignment: false
+use_mpbert: false
+use_one_cycle_lr: false
+use_perceiver_resampler: false
+use_pf: false
+use_ph_durations: false
+use_singing_labels: false
+use_snr_labels: false
+use_speaker_table: false
+use_speech_codes_as_input: true
+use_sv2tts: false
+use_tb: false
+use_tone: false
+use_tpgst: false
+use_wandb: false
+vad_algo: webrtc
+vad_margin: 0.1
+validate_sr: true
+validate_wav_len: true
+vc_mel2f0: false
+vc_soft_gt_pitch: false
+vc_soft_units: true
+ve_final_relu: false
+ve_hidden_size: 768
+ve_lr: 0.0001
+ve_min_samples: 20
+ve_partial_frames: 128
+ve_spk_batch_size: 128
+ve_utt_batch_size: 10
+voc_future_horizon: 11
+voc_lvc: false
+voc_lvc_dims: 8
+voc_noise_fir: true
+voc_subscale: 0
+voc_train_max_duration: 30
+voc_train_min_duration: 1.5
+voc_voiced_logits_scale: 0
+vocoder_bsize: 16
+vocoder_fc_dims: 512
+vocoder_hidden_size: 512
+vocoder_input_length: 16000
+vocoder_input_pad: 0
+vocoder_lr: 0.0001
+vocoder_mode: MOL
+wandb_watch_model: false
+webrtc_mode: 2
+weight_init: false
+win_size: 2048

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,175 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "[angry]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50258": {
+      "content": "[fear]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50259": {
+      "content": "[surprised]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50260": {
+      "content": "[whispering]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50261": {
+      "content": "[advertisement]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50262": {
+      "content": "[dramatic]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50263": {
+      "content": "[narration]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50264": {
+      "content": "[crying]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50265": {
+      "content": "[happy]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50266": {
+      "content": "[sarcastic]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50267": {
+      "content": "[clear throat]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50268": {
+      "content": "[sigh]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50269": {
+      "content": "[shush]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50270": {
+      "content": "[cough]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50271": {
+      "content": "[groan]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50272": {
+      "content": "[sniff]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50273": {
+      "content": "[gasp]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50274": {
+      "content": "[chuckle]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50275": {
+      "content": "[laugh]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

ve.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0921cab452fa278bc25cd23ffd59d36f816d7dc5181dd1bef9751a7fb61f63c
+size 5695784

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff