instruct model setup
This commit is contained in:
@@ -34,7 +34,7 @@ data:
|
||||
|
||||
# Model Configuration
|
||||
model:
|
||||
name: "unsloth/Qwen2.5-Coder-7B" # Model name from HuggingFace Hub (optimized for instruction following)
|
||||
name: "unsloth/Qwen2.5-14B-Instruct" # Model name from HuggingFace Hub (optimized for instruction following)
|
||||
max_length: 2048 # Maximum sequence length for tokenization
|
||||
max_seq_length: 2048 # Maximum sequence length for training (RoPE scaling supported)
|
||||
dtype: null # Data type: null for auto detection, float16 for Tesla T4/V100, bfloat16 for Ampere+
|
||||
@@ -42,7 +42,7 @@ model:
|
||||
token: null # HuggingFace token for gated models (e.g., "hf_...")
|
||||
|
||||
# Training Model Parameters
|
||||
training_model: "unsloth/Qwen2.5-Coder-7B" # Model to use for training
|
||||
training_model: "unsloth/Qwen2.5-14B-Instruct" # Model to use for training
|
||||
training_max_seq_length: 2048 # Max sequence length for training
|
||||
training_dtype: null # Data type for training
|
||||
training_load_in_4bit: true # 4bit quantization for training
|
||||
@@ -67,6 +67,7 @@ training:
|
||||
|
||||
# Output Configuration
|
||||
output_dir: "./outputs" # Directory for training checkpoints
|
||||
save_name: "qwen_2.5_test"
|
||||
model_output_dir: "./models/instruct" # Directory to save the trained model
|
||||
|
||||
# Inference Configuration
|
||||
|
||||
@@ -34,7 +34,7 @@ data:
|
||||
|
||||
# Model Configuration
|
||||
model:
|
||||
name: "unsloth/Qwen2.5-Coder-7B" # Model name from HuggingFace Hub (optimized for instruction following)
|
||||
name: "unsloth/Qwen2.5-14B-Instruct" # Model name from HuggingFace Hub (optimized for instruction following)
|
||||
max_length: 2048 # Maximum sequence length for tokenization
|
||||
max_seq_length: 2048 # Maximum sequence length for training (RoPE scaling supported)
|
||||
dtype: null # Data type: null for auto detection, float16 for Tesla T4/V100, bfloat16 for Ampere+
|
||||
@@ -42,7 +42,7 @@ model:
|
||||
token: null # HuggingFace token for gated models (e.g., "hf_...")
|
||||
|
||||
# Training Model Parameters
|
||||
training_model: "unsloth/Qwen2.5-Coder-7B" # Model to use for training
|
||||
training_model: "unsloth/Qwen2.5-14B-Instruct" # Model to use for training
|
||||
training_max_seq_length: 2048 # Max sequence length for training
|
||||
training_dtype: null # Data type for training
|
||||
training_load_in_4bit: true # 4bit quantization for training
|
||||
@@ -67,6 +67,7 @@ training:
|
||||
|
||||
# Output Configuration
|
||||
output_dir: "./outputs" # Directory for training checkpoints
|
||||
save_name: "qwen_2.5_test"
|
||||
model_output_dir: "./models/instruct" # Directory to save the trained model
|
||||
|
||||
# Inference Configuration
|
||||
|
||||
Reference in New Issue
Block a user