s2t_train_ctc.py
About 2 min
s2t_train_ctc.py
base parser
usage: s2t_train_ctc.py [-h] [--config CONFIG] [--print_config] [--log_level {ERROR,WARNING,INFO,DEBUG,NOTSET}] [--drop_last_iter DROP_LAST_ITER] [--dry_run DRY_RUN]
[--iterator_type {sequence,category,chunk,task,none}] [--valid_iterator_type {sequence,category,chunk,task,none}] [--output_dir OUTPUT_DIR] [--ngpu NGPU]
[--seed SEED] [--num_workers NUM_WORKERS] [--num_att_plot NUM_ATT_PLOT] [--dist_backend DIST_BACKEND] [--dist_init_method DIST_INIT_METHOD]
[--dist_world_size DIST_WORLD_SIZE] [--dist_rank DIST_RANK] [--local_rank LOCAL_RANK] [--dist_master_addr DIST_MASTER_ADDR] [--dist_master_port DIST_MASTER_PORT]
[--dist_launcher {slurm,mpi,None}] [--multiprocessing_distributed MULTIPROCESSING_DISTRIBUTED] [--unused_parameters UNUSED_PARAMETERS] [--sharded_ddp SHARDED_DDP]
[--use_deepspeed USE_DEEPSPEED] [--deepspeed_config DEEPSPEED_CONFIG] [--gradient_as_bucket_view GRADIENT_AS_BUCKET_VIEW]
[--ddp_comm_hook {none,fp16_compress_hook,bf16_compress_hook}] [--cudnn_enabled CUDNN_ENABLED] [--cudnn_benchmark CUDNN_BENCHMARK]
[--cudnn_deterministic CUDNN_DETERMINISTIC] [--use_tf32 USE_TF32] [--collect_stats COLLECT_STATS] [--write_collected_feats WRITE_COLLECTED_FEATS]
[--max_epoch MAX_EPOCH] [--patience PATIENCE] [--val_scheduler_criterion VAL_SCHEDULER_CRITERION VAL_SCHEDULER_CRITERION]
[--early_stopping_criterion EARLY_STOPPING_CRITERION EARLY_STOPPING_CRITERION EARLY_STOPPING_CRITERION]
[--best_model_criterion BEST_MODEL_CRITERION [BEST_MODEL_CRITERION ...]] [--keep_nbest_models KEEP_NBEST_MODELS [KEEP_NBEST_MODELS ...]]
[--nbest_averaging_interval NBEST_AVERAGING_INTERVAL] [--grad_clip GRAD_CLIP] [--grad_clip_type GRAD_CLIP_TYPE] [--grad_noise GRAD_NOISE] [--accum_grad ACCUM_GRAD]
[--no_forward_run NO_FORWARD_RUN] [--resume RESUME] [--train_dtype {float16,float32,float64}] [--use_amp USE_AMP] [--log_interval LOG_INTERVAL]
[--use_matplotlib USE_MATPLOTLIB] [--use_tensorboard USE_TENSORBOARD] [--create_graph_in_tensorboard CREATE_GRAPH_IN_TENSORBOARD] [--use_wandb USE_WANDB]
[--wandb_project WANDB_PROJECT] [--wandb_id WANDB_ID] [--wandb_entity WANDB_ENTITY] [--wandb_name WANDB_NAME] [--wandb_model_log_interval WANDB_MODEL_LOG_INTERVAL]
[--wandb_allow_val_change WANDB_ALLOW_VAL_CHANGE] [--detect_anomaly DETECT_ANOMALY] [--use_adapter USE_ADAPTER] [--adapter {lora,houlsby}]
[--save_strategy {all,adapter_only,required_grad_only}] [--adapter_conf ADAPTER_CONF] [--pretrain_path PRETRAIN_PATH] [--init_param [INIT_PARAM ...]]
[--ignore_init_mismatch IGNORE_INIT_MISMATCH] [--freeze_param [FREEZE_PARAM ...]] [--num_iters_per_epoch NUM_ITERS_PER_EPOCH] [--batch_size BATCH_SIZE]
[--valid_batch_size VALID_BATCH_SIZE] [--batch_bins BATCH_BINS] [--valid_batch_bins VALID_BATCH_BINS] [--category_sample_size CATEGORY_SAMPLE_SIZE]
[--upsampling_factor UPSAMPLING_FACTOR] [--category_upsampling_factor CATEGORY_UPSAMPLING_FACTOR] [--dataset_upsampling_factor DATASET_UPSAMPLING_FACTOR]
[--dataset_scaling_factor DATASET_SCALING_FACTOR] [--max_batch_size MAX_BATCH_SIZE] [--min_batch_size MIN_BATCH_SIZE] [--train_shape_file TRAIN_SHAPE_FILE]
[--valid_shape_file VALID_SHAPE_FILE] [--batch_type {unsorted,sorted,folded,length,numel,catbel,catpow,catpow_balance_dataset}]
[--valid_batch_type {unsorted,sorted,folded,length,numel,catbel,catpow,catpow_balance_dataset,None}] [--fold_length FOLD_LENGTH]
[--sort_in_batch {descending,ascending}] [--shuffle_within_batch SHUFFLE_WITHIN_BATCH] [--sort_batch {descending,ascending}] [--multiple_iterator MULTIPLE_ITERATOR]
[--chunk_length CHUNK_LENGTH] [--chunk_shift_ratio CHUNK_SHIFT_RATIO] [--num_cache_chunks NUM_CACHE_CHUNKS]
[--chunk_excluded_key_prefixes CHUNK_EXCLUDED_KEY_PREFIXES [CHUNK_EXCLUDED_KEY_PREFIXES ...]] [--chunk_default_fs CHUNK_DEFAULT_FS]
[--chunk_max_abs_length CHUNK_MAX_ABS_LENGTH] [--chunk_discard_short_samples CHUNK_DISCARD_SHORT_SAMPLES]
[--train_data_path_and_name_and_type TRAIN_DATA_PATH_AND_NAME_AND_TYPE] [--valid_data_path_and_name_and_type VALID_DATA_PATH_AND_NAME_AND_TYPE]
[--multi_task_dataset MULTI_TASK_DATASET] [--allow_variable_data_keys ALLOW_VARIABLE_DATA_KEYS] [--max_cache_size MAX_CACHE_SIZE] [--max_cache_fd MAX_CACHE_FD]
[--allow_multi_rates ALLOW_MULTI_RATES] [--valid_max_cache_size VALID_MAX_CACHE_SIZE] [--exclude_weight_decay EXCLUDE_WEIGHT_DECAY]
[--exclude_weight_decay_conf EXCLUDE_WEIGHT_DECAY_CONF]
[--optim {adam,adamw,sgd,adadelta,adagrad,adamax,asgd,lbfgs,rmsprop,rprop,radam,accagd,adabound,adamod,diffgrad,lamb,novograd,pid,qhm,sgdw,yogi}]
[--optim_conf OPTIM_CONF]
[--scheduler {reducelronplateau,lambdalr,steplr,multisteplr,exponentiallr,cosineannealinglr,noamlr,warmuplr,piecewiselinearwarmuplr,warmupsteplr,warmupreducelronplateau,cycliclr,onecyclelr,cosineannealingwarmrestarts,cosineannealingwarmuprestarts,tristagelr,exponentialdecaywarmup,None}]
[--scheduler_conf SCHEDULER_CONF] [--token_list TOKEN_LIST] [--init {xavier_uniform,xavier_normal,kaiming_uniform,kaiming_normal,None}] [--input_size INPUT_SIZE]
[--ctc_conf CTC_CONF] [--use_preprocessor USE_PREPROCESSOR] [--token_type {bpe,char,word,phn,hugging_face,whisper_en,whisper_multilingual}] [--bpemodel BPEMODEL]
[--non_linguistic_symbols NON_LINGUISTIC_SYMBOLS] [--cleaner {None,tacotron,jaconv,vietnamese,whisper_en,whisper_basic}]
[--g2p {None,g2p_en,g2p_en_no_space,pyopenjtalk,pyopenjtalk_kana,pyopenjtalk_accent,pyopenjtalk_accent_with_pause,pyopenjtalk_prosody,pypinyin_g2p,pypinyin_g2p_phone,pypinyin_g2p_phone_without_prosody,espeak_ng_arabic,espeak_ng_german,espeak_ng_french,espeak_ng_spanish,espeak_ng_russian,espeak_ng_greek,espeak_ng_finnish,espeak_ng_hungarian,espeak_ng_dutch,espeak_ng_english_us_vits,espeak_ng_hindi,espeak_ng_italian,espeak_ng_ukrainian,espeak_ng_polish,g2pk,g2pk_no_space,g2pk_explicit_space,korean_jaso,korean_jaso_no_space,g2p_is}]
[--speech_volume_normalize SPEECH_VOLUME_NORMALIZE] [--rir_scp RIR_SCP] [--rir_apply_prob RIR_APPLY_PROB] [--noise_scp NOISE_SCP]
[--noise_apply_prob NOISE_APPLY_PROB] [--noise_db_range NOISE_DB_RANGE] [--short_noise_thres SHORT_NOISE_THRES]
[--frontend {default,sliding_window,s3prl,fused,whisper}] [--frontend_conf FRONTEND_CONF] [--specaug {specaug,None}] [--specaug_conf SPECAUG_CONF]
[--normalize {global_mvn,utterance_mvn,None}] [--normalize_conf NORMALIZE_CONF] [--model {espnet,espnet_ctc}] [--model_conf MODEL_CONF]
[--promptencoder {conformer,transformer,branchformer,e_branchformer}] [--promptencoder_conf PROMPTENCODER_CONF]
[--encoder {conformer,transformer,transformer_multispkr,contextual_block_transformer,contextual_block_conformer,vgg_rnn,rnn,wav2vec2,hubert,hubert_pretrain,torchaudiohubert,longformer,branchformer,whisper,e_branchformer,e_branchformer_ctc}]
[--encoder_conf ENCODER_CONF] [--preprocessor {s2t,s2t_ctc}] [--preprocessor_conf PREPROCESSOR_CONF]