instruct fine tuning setup
This commit is contained in:
@@ -69,7 +69,7 @@ class InstructTrainer:
|
||||
self.model_output_dir = config.get('model_output_dir', './models/instruct')
|
||||
|
||||
# Chat template
|
||||
self.chat_template = config.get('chat_template', 'qwen-2.5')
|
||||
self.chat_template = config.get('chat_template', 'llama-3.1')
|
||||
|
||||
def load_model_and_tokenizer(self):
|
||||
"""Load the pre-trained model and tokenizer"""
|
||||
@@ -240,13 +240,13 @@ class InstructTrainer:
|
||||
try:
|
||||
# For Qwen models, we need to use the correct chat template tokens
|
||||
# Qwen uses different tokens than Llama
|
||||
if "qwen" in self.model_name.lower():
|
||||
instruction_part = "<|im_start|>user\n"
|
||||
response_part = "<|im_start|>assistant\n"
|
||||
else:
|
||||
# Default for other models
|
||||
instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n"
|
||||
response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n"
|
||||
# if "qwen" in self.model_name.lower():
|
||||
instruction_part = "<|im_start|>user\n"
|
||||
response_part = "<|im_start|>assistant\n"
|
||||
# else:
|
||||
# # Default for other models
|
||||
# instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n"
|
||||
# response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n"
|
||||
|
||||
# Configure trainer to only train on responses
|
||||
self.trainer = train_on_responses_only(
|
||||
@@ -291,7 +291,7 @@ class InstructTrainer:
|
||||
print("Step 6: Setting up trainer...")
|
||||
self.setup_trainer(formatted_dataset)
|
||||
|
||||
# Setup response-only training (optional but recommended for chat models)
|
||||
#Setup response-only training (optional but recommended for chat models)
|
||||
print("Step 7: Setting up response-only training...")
|
||||
try:
|
||||
self.setup_response_only_training()
|
||||
|
||||
Reference in New Issue
Block a user