# Comprehensive Instruct Configuration
# This file defines all parameters for instruction fine-tuning using conversational data
# Organized by level: task, data processing, model, training, and inference

# Task Configuration
task:
  name: "code_reasoning"                    # Task name: instruct, code_reasoning, general_chat
  type: "instruction_following"             # Model type: instruction_following, conversational

# Data Processing Configuration
data:
  source: "custom"                          # Data source: "huggingface" or "custom"
  data_path: "data/raw/swe_reasoning_dataset (3).jsonl"  # Path to conversation data file
  data_format: "jsonl"                      # Data format: "jsonl", "json"
  
  # Field Mapping for Conversation Data
  conversation_field: "conversation"       # Field name containing conversation array
  
  # Data Format & Processing
  max_length: 128000                          # Maximum text length (truncate longer texts)
  min_length: 10                            # Minimum text length (filter out shorter texts)
  
  # Text Preprocessing
  clean_text: true                          # Clean and normalize text
  
  # Data Splitting
  train_split: 0.8                          # Training split ratio (0.0 to 1.0)
  validation_split: 0.1                     # Validation split ratio (0.0 to 1.0)
  test_split: 0.1                           # Test split ratio (0.0 to 1.0)
  
  # Output Configuration
  output_format: "conversation"             # Output format: "conversation" (chat format)
  output_dir: "./data/processed/instruct/code_reasoning"  # Output directory for processed data

# Model Configuration
model:
  name: "unsloth/llama-3.3-70b-instruct-bnb-4bit"   # Model name from HuggingFace Hub (optimized for instruction following)
  max_length: 128000                          # Maximum sequence length for tokenization
  max_seq_length: 128000                      # Maximum sequence length for training (RoPE scaling supported)
  dtype: null                               # Data type: null for auto detection, float16 for Tesla T4/V100, bfloat16 for Ampere+
  load_in_4bit: true                        # Use 4bit quantization to reduce memory usage
  token: null                               # HuggingFace token for gated models (e.g., "hf_...")
  
  # Training Model Parameters
  training_model: "unsloth/llama-3.3-70b-instruct-bnb-4bit"  # Model to use for training
  training_max_seq_length: 128000             # Max sequence length for training
  training_dtype: null                      # Data type for training
  training_load_in_4bit: true               # 4bit quantization for training

# Training Configuration
training:
  num_epochs: 1                             # Number of training epochs (1 epoch is often sufficient for instruction tuning)
  batch_size: 1                             # Training batch size (small for large models)
  learning_rate: 2e-4                       # Learning rate (typical for instruction tuning)
  weight_decay: 0.01                        # Weight decay for optimizer (prevents overfitting)
  warmup_steps: 5                           # Warmup steps (fixed value)
  max_steps: 30                             # Maximum training steps (adjust based on dataset size)
  gradient_accumulation_steps: 4            # Gradient accumulation steps
  lr_scheduler_type: "linear"               # Scheduler type: "linear", "cosine", "polynomial"
  seed: 3407                                # Random seed for reproducibility
  
  # LoRA Configuration
  lora_r: 32                                # LoRA rank (higher = more parameters)
  lora_alpha: 16                            # LoRA alpha (scaling factor)
  lora_dropout: 0                           # LoRA dropout (0 is optimized)
  target_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
  
  # Output Configuration
  output_dir: "./outputs"                   # Directory for training checkpoints
  save_name: "qwen_2.5_test"
  model_output_dir: "./models/instruct"     # Directory to save the trained model

# Inference Configuration
inference:
  batch_size: 1                             # Batch size for inference
  max_new_tokens: 1024                       # Maximum new tokens to generate during inference
  temperature: 1.5                          # Sampling temperature (higher = more creative)
  min_p: 0.1                                # Min-p sampling parameter
  use_cache: true                           # Use key-value cache for faster generation