instruct model setup

2025-08-28 17:57:59 +00:00
parent 77c563f358
commit d49b4ff2d5
55 changed files with 27760 additions and 326 deletions
@@ -34,7 +34,7 @@ data:

 # Model Configuration
 model:
-  name: "unsloth/Qwen2.5-Coder-7B"     # Model name from HuggingFace Hub (optimized for instruction following)
+  name: "unsloth/Qwen2.5-14B-Instruct"   # Model name from HuggingFace Hub (optimized for instruction following)
  max_length: 2048                          # Maximum sequence length for tokenization
  max_seq_length: 2048                      # Maximum sequence length for training (RoPE scaling supported)
  dtype: null                               # Data type: null for auto detection, float16 for Tesla T4/V100, bfloat16 for Ampere+
@@ -42,7 +42,7 @@ model:
  token: null                               # HuggingFace token for gated models (e.g., "hf_...")
  
  # Training Model Parameters
-  training_model: "unsloth/Qwen2.5-Coder-7B"  # Model to use for training
+  training_model: "unsloth/Qwen2.5-14B-Instruct"  # Model to use for training
  training_max_seq_length: 2048             # Max sequence length for training
  training_dtype: null                      # Data type for training
  training_load_in_4bit: true               # 4bit quantization for training
@@ -67,6 +67,7 @@ training:
  
  # Output Configuration
  output_dir: "./outputs"                   # Directory for training checkpoints
+  save_name: "qwen_2.5_test"
  model_output_dir: "./models/instruct"     # Directory to save the trained model

 # Inference Configuration
@@ -34,7 +34,7 @@ data:

 # Model Configuration
 model:
-  name: "unsloth/Qwen2.5-Coder-7B"     # Model name from HuggingFace Hub (optimized for instruction following)
+  name: "unsloth/Qwen2.5-14B-Instruct"   # Model name from HuggingFace Hub (optimized for instruction following)
  max_length: 2048                          # Maximum sequence length for tokenization
  max_seq_length: 2048                      # Maximum sequence length for training (RoPE scaling supported)
  dtype: null                               # Data type: null for auto detection, float16 for Tesla T4/V100, bfloat16 for Ampere+
@@ -42,7 +42,7 @@ model:
  token: null                               # HuggingFace token for gated models (e.g., "hf_...")
  
  # Training Model Parameters
-  training_model: "unsloth/Qwen2.5-Coder-7B"  # Model to use for training
+  training_model: "unsloth/Qwen2.5-14B-Instruct"  # Model to use for training
  training_max_seq_length: 2048             # Max sequence length for training
  training_dtype: null                      # Data type for training
  training_load_in_4bit: true               # 4bit quantization for training
@@ -67,6 +67,7 @@ training:
  
  # Output Configuration
  output_dir: "./outputs"                   # Directory for training checkpoints
+  save_name: "qwen_2.5_test"
  model_output_dir: "./models/instruct"     # Directory to save the trained model

 # Inference Configuration