instruct fine tuning setup

2025-08-28 22:41:56 +00:00
parent d49b4ff2d5
commit 78d7063ce5
58 changed files with 2510234 additions and 98 deletions
@@ -17,7 +17,7 @@ data:
  conversation_field: "conversation"       # Field name containing conversation array
  
  # Data Format & Processing
-  max_length: 2048                          # Maximum text length (truncate longer texts)
+  max_length: 128000                          # Maximum text length (truncate longer texts)
  min_length: 10                            # Minimum text length (filter out shorter texts)
  
  # Text Preprocessing
@@ -34,16 +34,16 @@ data:

 # Model Configuration
 model:
-  name: "unsloth/Qwen2.5-14B-Instruct"   # Model name from HuggingFace Hub (optimized for instruction following)
-  max_length: 2048                          # Maximum sequence length for tokenization
-  max_seq_length: 2048                      # Maximum sequence length for training (RoPE scaling supported)
+  name: "unsloth/llama-3.3-70b-instruct-bnb-4bit"   # Model name from HuggingFace Hub (optimized for instruction following)
+  max_length: 128000                          # Maximum sequence length for tokenization
+  max_seq_length: 128000                      # Maximum sequence length for training (RoPE scaling supported)
  dtype: null                               # Data type: null for auto detection, float16 for Tesla T4/V100, bfloat16 for Ampere+
  load_in_4bit: true                        # Use 4bit quantization to reduce memory usage
  token: null                               # HuggingFace token for gated models (e.g., "hf_...")
  
  # Training Model Parameters
-  training_model: "unsloth/Qwen2.5-14B-Instruct"  # Model to use for training
-  training_max_seq_length: 2048             # Max sequence length for training
+  training_model: "unsloth/llama-3.3-70b-instruct-bnb-4bit"  # Model to use for training
+  training_max_seq_length: 128000             # Max sequence length for training
  training_dtype: null                      # Data type for training
  training_load_in_4bit: true               # 4bit quantization for training

@@ -73,7 +73,7 @@ training:
 # Inference Configuration
 inference:
  batch_size: 1                             # Batch size for inference
-  max_new_tokens: 128                       # Maximum new tokens to generate during inference
+  max_new_tokens: 1024                       # Maximum new tokens to generate during inference
  temperature: 1.5                          # Sampling temperature (higher = more creative)
  min_p: 0.1                                # Min-p sampling parameter
  use_cache: true                           # Use key-value cache for faster generation
@@ -17,7 +17,7 @@ data:
  conversation_field: "conversation"       # Field name containing conversation array
  
  # Data Format & Processing
-  max_length: 2048                          # Maximum text length (truncate longer texts)
+  max_length: 128000                          # Maximum text length (truncate longer texts)
  min_length: 10                            # Minimum text length (filter out shorter texts)
  
  # Text Preprocessing
@@ -34,16 +34,16 @@ data:

 # Model Configuration
 model:
-  name: "unsloth/Qwen2.5-14B-Instruct"   # Model name from HuggingFace Hub (optimized for instruction following)
-  max_length: 2048                          # Maximum sequence length for tokenization
-  max_seq_length: 2048                      # Maximum sequence length for training (RoPE scaling supported)
+  name: "unsloth/llama-3.3-70b-instruct-bnb-4bit"   # Model name from HuggingFace Hub (optimized for instruction following)
+  max_length: 128000                          # Maximum sequence length for tokenization
+  max_seq_length: 128000                      # Maximum sequence length for training (RoPE scaling supported)
  dtype: null                               # Data type: null for auto detection, float16 for Tesla T4/V100, bfloat16 for Ampere+
  load_in_4bit: true                        # Use 4bit quantization to reduce memory usage
  token: null                               # HuggingFace token for gated models (e.g., "hf_...")
  
  # Training Model Parameters
-  training_model: "unsloth/Qwen2.5-14B-Instruct"  # Model to use for training
-  training_max_seq_length: 2048             # Max sequence length for training
+  training_model: "unsloth/llama-3.3-70b-instruct-bnb-4bit"  # Model to use for training
+  training_max_seq_length: 128000             # Max sequence length for training
  training_dtype: null                      # Data type for training
  training_load_in_4bit: true               # 4bit quantization for training

@@ -70,10 +70,11 @@ training:
  save_name: "qwen_2.5_test"
  model_output_dir: "./models/instruct"     # Directory to save the trained model

+
 # Inference Configuration
 inference:
  batch_size: 1                             # Batch size for inference
-  max_new_tokens: 128                       # Maximum new tokens to generate during inference
+  max_new_tokens: 1024                       # Maximum new tokens to generate during inference
  temperature: 1.5                          # Sampling temperature (higher = more creative)
  min_p: 0.1                                # Min-p sampling parameter
  use_cache: true                           # Use key-value cache for faster generation