s2t_train_ctc.py

About 2 min
s2t_train_ctc.py

base parser
usage: s2t_train_ctc.py [-h] [--config CONFIG] [--print_config] [--log_level {ERROR,WARNING,INFO,DEBUG,NOTSET}] [--drop_last_iter DROP_LAST_ITER] [--dry_run DRY_RUN]
                        [--iterator_type {sequence,category,chunk,task,none}] [--valid_iterator_type {sequence,category,chunk,task,none}] [--output_dir OUTPUT_DIR] [--ngpu NGPU] [--seed SEED]
                        [--num_workers NUM_WORKERS] [--num_att_plot NUM_ATT_PLOT] [--dist_backend DIST_BACKEND] [--dist_init_method DIST_INIT_METHOD] [--dist_world_size DIST_WORLD_SIZE] [--dist_rank DIST_RANK]
                        [--local_rank LOCAL_RANK] [--dist_master_addr DIST_MASTER_ADDR] [--dist_master_port DIST_MASTER_PORT] [--dist_launcher {slurm,mpi,None}]
                        [--multiprocessing_distributed MULTIPROCESSING_DISTRIBUTED] [--unused_parameters UNUSED_PARAMETERS] [--sharded_ddp SHARDED_DDP] [--use_deepspeed USE_DEEPSPEED]
                        [--deepspeed_config DEEPSPEED_CONFIG] [--gradient_as_bucket_view GRADIENT_AS_BUCKET_VIEW] [--ddp_comm_hook {none,fp16_compress_hook,bf16_compress_hook}] [--cudnn_enabled CUDNN_ENABLED]
                        [--cudnn_benchmark CUDNN_BENCHMARK] [--cudnn_deterministic CUDNN_DETERMINISTIC] [--use_tf32 USE_TF32] [--collect_stats COLLECT_STATS] [--write_collected_feats WRITE_COLLECTED_FEATS]
                        [--max_epoch MAX_EPOCH] [--patience PATIENCE] [--val_scheduler_criterion VAL_SCHEDULER_CRITERION VAL_SCHEDULER_CRITERION]
                        [--early_stopping_criterion EARLY_STOPPING_CRITERION EARLY_STOPPING_CRITERION EARLY_STOPPING_CRITERION] [--best_model_criterion BEST_MODEL_CRITERION [BEST_MODEL_CRITERION ...]]
                        [--keep_nbest_models KEEP_NBEST_MODELS [KEEP_NBEST_MODELS ...]] [--nbest_averaging_interval NBEST_AVERAGING_INTERVAL] [--grad_clip GRAD_CLIP] [--grad_clip_type GRAD_CLIP_TYPE]
                        [--grad_noise GRAD_NOISE] [--accum_grad ACCUM_GRAD] [--no_forward_run NO_FORWARD_RUN] [--resume RESUME] [--train_dtype {float16,float32,float64}] [--use_amp USE_AMP]
                        [--log_interval LOG_INTERVAL] [--use_matplotlib USE_MATPLOTLIB] [--use_tensorboard USE_TENSORBOARD] [--create_graph_in_tensorboard CREATE_GRAPH_IN_TENSORBOARD] [--use_wandb USE_WANDB]
                        [--wandb_project WANDB_PROJECT] [--wandb_id WANDB_ID] [--wandb_entity WANDB_ENTITY] [--wandb_name WANDB_NAME] [--wandb_model_log_interval WANDB_MODEL_LOG_INTERVAL]
                        [--wandb_allow_val_change WANDB_ALLOW_VAL_CHANGE] [--detect_anomaly DETECT_ANOMALY] [--use_adapter USE_ADAPTER] [--adapter {lora,houlsby}]
                        [--save_strategy {all,adapter_only,required_grad_only}] [--adapter_conf ADAPTER_CONF] [--pretrain_path PRETRAIN_PATH] [--init_param [INIT_PARAM ...]]
                        [--ignore_init_mismatch IGNORE_INIT_MISMATCH] [--freeze_param [FREEZE_PARAM ...]] [--num_iters_per_epoch NUM_ITERS_PER_EPOCH] [--batch_size BATCH_SIZE] [--valid_batch_size VALID_BATCH_SIZE]
                        [--batch_bins BATCH_BINS] [--valid_batch_bins VALID_BATCH_BINS] [--category_sample_size CATEGORY_SAMPLE_SIZE] [--upsampling_factor UPSAMPLING_FACTOR]
                        [--category_upsampling_factor CATEGORY_UPSAMPLING_FACTOR] [--dataset_upsampling_factor DATASET_UPSAMPLING_FACTOR] [--dataset_scaling_factor DATASET_SCALING_FACTOR]
                        [--max_batch_size MAX_BATCH_SIZE] [--min_batch_size MIN_BATCH_SIZE] [--train_shape_file TRAIN_SHAPE_FILE] [--valid_shape_file VALID_SHAPE_FILE]
                        [--batch_type {unsorted,sorted,folded,length,numel,catbel,catpow,catpow_balance_dataset}] [--valid_batch_type {unsorted,sorted,folded,length,numel,catbel,catpow,catpow_balance_dataset,None}]
                        [--fold_length FOLD_LENGTH] [--sort_in_batch {descending,ascending}] [--shuffle_within_batch SHUFFLE_WITHIN_BATCH] [--sort_batch {descending,ascending}] [--multiple_iterator MULTIPLE_ITERATOR]
                        [--chunk_length CHUNK_LENGTH] [--chunk_shift_ratio CHUNK_SHIFT_RATIO] [--num_cache_chunks NUM_CACHE_CHUNKS]
                        [--chunk_excluded_key_prefixes CHUNK_EXCLUDED_KEY_PREFIXES [CHUNK_EXCLUDED_KEY_PREFIXES ...]] [--chunk_default_fs CHUNK_DEFAULT_FS] [--chunk_max_abs_length CHUNK_MAX_ABS_LENGTH]
                        [--chunk_discard_short_samples CHUNK_DISCARD_SHORT_SAMPLES] [--train_data_path_and_name_and_type TRAIN_DATA_PATH_AND_NAME_AND_TYPE]
                        [--valid_data_path_and_name_and_type VALID_DATA_PATH_AND_NAME_AND_TYPE] [--multi_task_dataset MULTI_TASK_DATASET] [--allow_variable_data_keys ALLOW_VARIABLE_DATA_KEYS]
                        [--max_cache_size MAX_CACHE_SIZE] [--max_cache_fd MAX_CACHE_FD] [--allow_multi_rates ALLOW_MULTI_RATES] [--valid_max_cache_size VALID_MAX_CACHE_SIZE]
                        [--exclude_weight_decay EXCLUDE_WEIGHT_DECAY] [--exclude_weight_decay_conf EXCLUDE_WEIGHT_DECAY_CONF]
                        [--optim {adam,adamw,sgd,adadelta,adagrad,adamax,asgd,lbfgs,rmsprop,rprop,radam,accagd,adabound,adamod,diffgrad,lamb,novograd,pid,qhm,sgdw,yogi}] [--optim_conf OPTIM_CONF]
                        [--scheduler {reducelronplateau,lambdalr,steplr,multisteplr,exponentiallr,cosineannealinglr,noamlr,warmuplr,piecewiselinearwarmuplr,warmupsteplr,warmupreducelronplateau,cycliclr,onecyclelr,cosineannealingwarmrestarts,cosineannealingwarmuprestarts,tristagelr,exponentialdecaywarmup,None}]
                        [--scheduler_conf SCHEDULER_CONF] [--token_list TOKEN_LIST] [--init {xavier_uniform,xavier_normal,kaiming_uniform,kaiming_normal,None}] [--input_size INPUT_SIZE] [--ctc_conf CTC_CONF]
                        [--use_preprocessor USE_PREPROCESSOR] [--token_type {bpe,char,word,phn,hugging_face,whisper_en,whisper_multilingual}] [--bpemodel BPEMODEL] [--non_linguistic_symbols NON_LINGUISTIC_SYMBOLS]
                        [--cleaner {None,tacotron,jaconv,vietnamese,whisper_en,whisper_basic}]
                        [--g2p {None,g2p_en,g2p_en_no_space,pyopenjtalk,pyopenjtalk_kana,pyopenjtalk_accent,pyopenjtalk_accent_with_pause,pyopenjtalk_prosody,pypinyin_g2p,pypinyin_g2p_phone,pypinyin_g2p_phone_without_prosody,espeak_ng_arabic,espeak_ng_german,espeak_ng_french,espeak_ng_spanish,espeak_ng_russian,espeak_ng_greek,espeak_ng_finnish,espeak_ng_hungarian,espeak_ng_dutch,espeak_ng_english_us_vits,espeak_ng_hindi,espeak_ng_italian,espeak_ng_ukrainian,espeak_ng_polish,g2pk,g2pk_no_space,g2pk_explicit_space,korean_jaso,korean_jaso_no_space,g2p_is}]
                        [--speech_volume_normalize SPEECH_VOLUME_NORMALIZE] [--rir_scp RIR_SCP] [--rir_apply_prob RIR_APPLY_PROB] [--noise_scp NOISE_SCP] [--noise_apply_prob NOISE_APPLY_PROB]
                        [--noise_db_range NOISE_DB_RANGE] [--short_noise_thres SHORT_NOISE_THRES] [--frontend {default,sliding_window,s3prl,fused,whisper}] [--frontend_conf FRONTEND_CONF] [--specaug {specaug,None}]
                        [--specaug_conf SPECAUG_CONF] [--normalize {global_mvn,utterance_mvn,None}] [--normalize_conf NORMALIZE_CONF] [--model {espnet,espnet_ctc}] [--model_conf MODEL_CONF]
                        [--promptencoder {conformer,transformer,branchformer,e_branchformer}] [--promptencoder_conf PROMPTENCODER_CONF]
                        [--encoder {conformer,transformer,transformer_multispkr,contextual_block_transformer,contextual_block_conformer,vgg_rnn,rnn,wav2vec2,hubert,hubert_pretrain,torchaudiohubert,longformer,branchformer,whisper,e_branchformer,e_branchformer_ctc}]
                        [--encoder_conf ENCODER_CONF] [--preprocessor {s2t,s2t_ctc}] [--preprocessor_conf PREPROCESSOR_CONF]
s2t_train_ctc.py

s2t_train_ctc.py

Named Arguments

Common configuration