Files
DS-LLM-TEMPLATE-FINETUNING/outputs/checkpoint-60/training_args.bin
T

37 lines
6.0 KiB
Plaintext
Raw Normal View History

2025-08-13 23:50:20 +00:00
PK training_args/data.pklFBZZZZZZZZ€ctrl.trainer.sft_config
SFTConfig
q)q}q(X
output_dirqX ./outputsqXoverwrite_output_dirq‰Xdo_trainq‰Xdo_evalq‰X
do_predictq‰X
eval_strategyq ctransformers.trainer_utils
IntervalStrategy
q
Xnoq …q Rq
Xprediction_loss_onlyq‰Xper_device_train_batch_sizeqKXper_device_eval_batch_sizeqKXper_gpu_train_batch_sizeqNXper_gpu_eval_batch_sizeqNXgradient_accumulation_stepsqKXeval_accumulation_stepsqNX
eval_delayqKXtorch_empty_cache_stepsqNX
learning_rateqG?*6âëC-X weight_decayqG?„záG®{X
adam_beta1qG?ìÌÌÌÌÌÍX
adam_beta2qG?ï÷ÎÙ‡+X adam_epsilonqG>EyŽâ0Œ:X
max_grad_normqG?ðXnum_train_epochsqKX max_stepsqK<Xlr_scheduler_typeqctransformers.trainer_utils
SchedulerType
q Xlinearq!…q"Rq#Xlr_scheduler_kwargsq$}q%X warmup_ratioq&GX warmup_stepsq'KX log_levelq(Xpassiveq)Xlog_level_replicaq*Xwarningq+Xlog_on_each_nodeq,ˆX logging_dirq-X*./outputs/runs/Aug13_23-30-02_df7db15e9335q.Xlogging_strategyq/h
Xstepsq0…q1Rq2Xlogging_first_stepq3‰X
logging_stepsq4KXlogging_nan_inf_filterq5ˆX
save_strategyq6ctransformers.trainer_utils
SaveStrategy
q7h0…q8Rq9X
save_stepsq:MôXsave_total_limitq;NXsave_safetensorsq<ˆXsave_on_each_nodeq=‰Xsave_only_modelq>‰X'restore_callback_states_from_checkpointq?‰Xno_cudaq@‰Xuse_cpuqA‰Xuse_mps_deviceqB‰XseedqCMO
X data_seedqDNX
jit_mode_evalqE‰Xuse_ipexqF‰Xbf16qGˆXfp16qH‰Xfp16_opt_levelqIXO1qJXhalf_precision_backendqKXautoqLXbf16_full_evalqMˆXfp16_full_evalqN‰Xtf32qONX
local_rankqPKX ddp_backendqQNX
tpu_num_coresqRNXtpu_metrics_debugqS‰XdebugqT]qUXdataloader_drop_lastqV‰X
eval_stepsqWNXdataloader_num_workersqXKXdataloader_prefetch_factorqYNX
past_indexqZJÿÿÿÿXrun_nameq[NX disable_tqdmq\‰Xremove_unused_columnsq]ˆX label_namesq^NXload_best_model_at_endq_‰Xmetric_for_best_modelq`NXgreater_is_betterqaNXignore_data_skipqb‰Xfsdpqc]qdXfsdp_min_num_paramsqeKX fsdp_configqf}qg(Xmin_num_paramsqhKXxlaqi‰X xla_fsdp_v2qj‰Xxla_fsdp_grad_ckptqk‰uX"fsdp_transformer_layer_cls_to_wrapqlNXaccelerator_configqmctransformers.trainer_pt_utils
AcceleratorConfig
qn)qo}qp(X
split_batchesqq‰Xdispatch_batchesqrNX even_batchesqsˆXuse_seedable_samplerqtˆX non_blockingqu‰Xgradient_accumulation_kwargsqvNubX deepspeedqwNXlabel_smoothing_factorqxGXoptimqyctransformers.training_args
OptimizerNames
qzX
adamw_8bitq{…q|Rq}X
optim_argsq~NX adafactorq‰Xgroup_by_lengthq€‰Xlength_column_nameqXlengthqX report_toqƒ]q„Xddp_find_unused_parametersq…NXddp_bucket_cap_mbq†NXddp_broadcast_buffersq‡NXdataloader_pin_memoryqˆˆXdataloader_persistent_workersq‰‰Xskip_memory_metricsqŠˆXuse_legacy_prediction_loopq‹‰X push_to_hubqŒ‰Xresume_from_checkpointqNX hub_model_idqŽNX hub_strategyqctransformers.trainer_utils
HubStrategy