instruct fine tuning setup

This commit is contained in:
Your Name
2025-08-28 22:41:56 +00:00
parent d49b4ff2d5
commit 78d7063ce5
58 changed files with 2510234 additions and 98 deletions
@@ -17,7 +17,7 @@ data:
conversation_field: "conversation" # Field name containing conversation array
# Data Format & Processing
max_length: 2048 # Maximum text length (truncate longer texts)
max_length: 128000 # Maximum text length (truncate longer texts)
min_length: 10 # Minimum text length (filter out shorter texts)
# Text Preprocessing
@@ -34,16 +34,16 @@ data:
# Model Configuration
model:
name: "unsloth/Qwen2.5-14B-Instruct" # Model name from HuggingFace Hub (optimized for instruction following)
max_length: 2048 # Maximum sequence length for tokenization
max_seq_length: 2048 # Maximum sequence length for training (RoPE scaling supported)
name: "unsloth/llama-3.3-70b-instruct-bnb-4bit" # Model name from HuggingFace Hub (optimized for instruction following)
max_length: 128000 # Maximum sequence length for tokenization
max_seq_length: 128000 # Maximum sequence length for training (RoPE scaling supported)
dtype: null # Data type: null for auto detection, float16 for Tesla T4/V100, bfloat16 for Ampere+
load_in_4bit: true # Use 4bit quantization to reduce memory usage
token: null # HuggingFace token for gated models (e.g., "hf_...")
# Training Model Parameters
training_model: "unsloth/Qwen2.5-14B-Instruct" # Model to use for training
training_max_seq_length: 2048 # Max sequence length for training
training_model: "unsloth/llama-3.3-70b-instruct-bnb-4bit" # Model to use for training
training_max_seq_length: 128000 # Max sequence length for training
training_dtype: null # Data type for training
training_load_in_4bit: true # 4bit quantization for training
@@ -73,7 +73,7 @@ training:
# Inference Configuration
inference:
batch_size: 1 # Batch size for inference
max_new_tokens: 128 # Maximum new tokens to generate during inference
max_new_tokens: 1024 # Maximum new tokens to generate during inference
temperature: 1.5 # Sampling temperature (higher = more creative)
min_p: 0.1 # Min-p sampling parameter
use_cache: true # Use key-value cache for faster generation
+8 -7
View File
@@ -17,7 +17,7 @@ data:
conversation_field: "conversation" # Field name containing conversation array
# Data Format & Processing
max_length: 2048 # Maximum text length (truncate longer texts)
max_length: 128000 # Maximum text length (truncate longer texts)
min_length: 10 # Minimum text length (filter out shorter texts)
# Text Preprocessing
@@ -34,16 +34,16 @@ data:
# Model Configuration
model:
name: "unsloth/Qwen2.5-14B-Instruct" # Model name from HuggingFace Hub (optimized for instruction following)
max_length: 2048 # Maximum sequence length for tokenization
max_seq_length: 2048 # Maximum sequence length for training (RoPE scaling supported)
name: "unsloth/llama-3.3-70b-instruct-bnb-4bit" # Model name from HuggingFace Hub (optimized for instruction following)
max_length: 128000 # Maximum sequence length for tokenization
max_seq_length: 128000 # Maximum sequence length for training (RoPE scaling supported)
dtype: null # Data type: null for auto detection, float16 for Tesla T4/V100, bfloat16 for Ampere+
load_in_4bit: true # Use 4bit quantization to reduce memory usage
token: null # HuggingFace token for gated models (e.g., "hf_...")
# Training Model Parameters
training_model: "unsloth/Qwen2.5-14B-Instruct" # Model to use for training
training_max_seq_length: 2048 # Max sequence length for training
training_model: "unsloth/llama-3.3-70b-instruct-bnb-4bit" # Model to use for training
training_max_seq_length: 128000 # Max sequence length for training
training_dtype: null # Data type for training
training_load_in_4bit: true # 4bit quantization for training
@@ -70,10 +70,11 @@ training:
save_name: "qwen_2.5_test"
model_output_dir: "./models/instruct" # Directory to save the trained model
# Inference Configuration
inference:
batch_size: 1 # Batch size for inference
max_new_tokens: 128 # Maximum new tokens to generate during inference
max_new_tokens: 1024 # Maximum new tokens to generate during inference
temperature: 1.5 # Sampling temperature (higher = more creative)
min_p: 0.1 # Min-p sampling parameter
use_cache: true # Use key-value cache for faster generation