From 710d074b47f0ab65e74ef9304f64622167610c95 Mon Sep 17 00:00:00 2001
From: OwusuBlessing <owususammy509@gmail.com>
Date: Wed, 13 Aug 2025 21:17:01 +0100
Subject: [PATCH] added style mimicking piepelines

---
 configs/QUICK_REFERENCE.md                    |  191 +++
 configs/README.md                             |  207 +++
 configs/classification/emotion.yaml           |   52 +-
 configs/styling/formal.yaml                   |   80 +-
 data/alpaca/test.jsonl                        |    1 +
 data/alpaca/train.jsonl                       |    1 +
 data/alpaca/validation.jsonl                  |    1 +
 data/hf_dataset/data-00000-of-00001.arrow     |  Bin 0 -> 2208 bytes
 data/hf_dataset/dataset_info.json             |   24 +
 data/hf_dataset/state.json                    |   13 +
 .../styling/formal/alpaca/test.jsonl          |    1 +
 .../styling/formal/alpaca/train.jsonl         |    3 +
 .../styling/formal/alpaca/validation.jsonl    |    1 +
 data/processed/styling/formal/test.jsonl      |    1 +
 data/processed/styling/formal/train.jsonl     |    3 +
 .../processed/styling/formal/validation.jsonl |    1 +
 data/raw/styling/sample_formal.jsonl          |    5 +
 data/raw/styling/test_formal.jsonl            |    3 +
 data/raw/styling/test_missing_fields.jsonl    |    5 +
 .../__pycache__/__init__.cpython-311.pyc      |  Bin 0 -> 184 bytes
 .../__pycache__/__init__.cpython-311.pyc      |  Bin 0 -> 192 bytes
 .../data_processor.cpython-311.pyc            |  Bin 0 -> 75235 bytes
 pipelines/styling/data_processor.py           | 1488 +++++++++++++++++
 pipelines/styling/inference.py                |  346 ++++
 pipelines/styling/train.py                    |  446 +++++
 scripts/styling/__init__.py                   |   45 +
 scripts/styling/data_processor.py             |  302 ++++
 scripts/styling/inference.py                  |  223 +++
 scripts/styling/train.py                      |  168 ++
 test.py                                       |  251 +++
 test.readme                                   |    0
 31 files changed, 3816 insertions(+), 46 deletions(-)
 create mode 100644 configs/QUICK_REFERENCE.md
 create mode 100644 configs/README.md
 create mode 100644 data/alpaca/test.jsonl
 create mode 100644 data/alpaca/train.jsonl
 create mode 100644 data/alpaca/validation.jsonl
 create mode 100644 data/hf_dataset/data-00000-of-00001.arrow
 create mode 100644 data/hf_dataset/dataset_info.json
 create mode 100644 data/hf_dataset/state.json
 create mode 100644 data/processed/styling/formal/alpaca/test.jsonl
 create mode 100644 data/processed/styling/formal/alpaca/train.jsonl
 create mode 100644 data/processed/styling/formal/alpaca/validation.jsonl
 create mode 100644 data/processed/styling/formal/test.jsonl
 create mode 100644 data/processed/styling/formal/train.jsonl
 create mode 100644 data/processed/styling/formal/validation.jsonl
 create mode 100644 data/raw/styling/sample_formal.jsonl
 create mode 100644 data/raw/styling/test_formal.jsonl
 create mode 100644 data/raw/styling/test_missing_fields.jsonl
 create mode 100644 pipelines/__pycache__/__init__.cpython-311.pyc
 create mode 100644 pipelines/styling/__pycache__/__init__.cpython-311.pyc
 create mode 100644 pipelines/styling/__pycache__/data_processor.cpython-311.pyc
 create mode 100644 pipelines/styling/data_processor.py
 create mode 100644 pipelines/styling/inference.py
 create mode 100644 pipelines/styling/train.py
 create mode 100644 scripts/styling/__init__.py
 create mode 100644 scripts/styling/data_processor.py
 create mode 100644 scripts/styling/inference.py
 create mode 100644 scripts/styling/train.py
 create mode 100644 test.py
 create mode 100644 test.readme

diff --git a/configs/QUICK_REFERENCE.md b/configs/QUICK_REFERENCE.md
new file mode 100644
index 0000000..f198c42
--- /dev/null
+++ b/configs/QUICK_REFERENCE.md
@@ -0,0 +1,191 @@
+# Quick Reference Card
+
+## Essential Parameters (Most Common)
+
+### Data Source & Location
+```yaml
+data:
+  source: "huggingface|custom"             # REQUIRED: Data source type
+  dataset_name: "dataset/name"             # REQUIRED for huggingface
+  data_path: "./path/to/file"              # REQUIRED for custom
+  data_format: "jsonl|csv|json"            # REQUIRED for custom
+```
+
+### Field Mapping
+```yaml
+data:
+  input_field: "text"                      # REQUIRED: Input text field
+  label_field: "label"                     # REQUIRED for classification
+  output_field: "styled_text"              # REQUIRED for styling
+  instruction: "Style instruction"          # REQUIRED for styling
+```
+
+### Basic Processing
+```yaml
+data:
+  max_samples: 1000                        # Limit total samples
+  train_split: 0.8                         # Training ratio (0.0-1.0)
+  validation_split: 0.1                    # Validation ratio (0.0-1.0)
+  test_split: 0.1                          # Test ratio (0.0-1.0)
+  output_dir: "./output/path"              # Output directory
+```
+
+### Text Preprocessing
+```yaml
+data:
+  clean_text: true                         # Clean/normalize text
+  lowercase: true                          # Convert to lowercase
+  min_length: 10                           # Minimum text length
+  max_length: 512                          # Maximum text length
+```
+
+### Model & Training
+```yaml
+model:
+  name: "bert-base-uncased"                # Model name
+  max_length: 512                          # Max sequence length
+
+training:
+  num_epochs: 3                            # Training epochs
+  batch_size: 16                           # Batch size
+  learning_rate: 2e-5                      # Learning rate
+```
+
+## Common Configurations by Task
+
+### Classification
+```yaml
+task:
+  name: "classification"
+  type: "sequence_classification"
+
+data:
+  source: "huggingface"
+  dataset_name: "dair-ai/emotion"
+  input_field: "text"
+  label_field: "label"
+  output_format: "classification"
+```
+
+### Styling
+```yaml
+task:
+  name: "styling"
+  type: "style_transfer"
+
+data:
+  source: "custom"
+  data_path: "./data.jsonl"
+  input_field: "text"
+  output_field: "styled_text"
+  instruction: "Rewrite in formal style"
+  output_format: "alpaca"
+```
+
+### Text Generation
+```yaml
+task:
+  name: "completion"
+  type: "text_generation"
+
+data:
+  source: "custom"
+  data_path: "./prompts.jsonl"
+  input_field: "prompt"
+  output_field: "completion"
+  output_format: "instruction"
+```
+
+## Quick Start Templates
+
+### 1. HuggingFace Dataset
+```yaml
+task:
+  name: "classification"
+  type: "sequence_classification"
+
+data:
+  source: "huggingface"
+  dataset_name: "your/dataset"
+  input_field: "text"
+  label_field: "label"
+  max_samples: 1000
+  output_dir: "./output"
+```
+
+### 2. Custom JSONL File
+```yaml
+task:
+  name: "styling"
+  type: "style_transfer"
+
+data:
+  source: "custom"
+  data_path: "./your_data.jsonl"
+  data_format: "jsonl"
+  input_field: "source"
+  output_field: "target"
+  instruction: "Your style instruction"
+  output_dir: "./output"
+```
+
+### 3. CSV File
+```yaml
+task:
+  name: "classification"
+  type: "sequence_classification"
+
+data:
+  source: "custom"
+  data_path: "./your_data.csv"
+  data_format: "csv"
+  input_field: "text"
+  label_field: "label"
+  delimiter: ","
+  output_dir: "./output"
+```
+
+## Parameter Ranges & Recommendations
+
+### Split Ratios
+- **Total must be ≤ 1.0**
+- **Common**: train=0.8, val=0.1, test=0.1
+- **Small datasets**: train=0.7, val=0.15, test=0.15
+
+### Learning Rates
+- **Fine-tuning**: 1e-5 to 5e-5
+- **Training from scratch**: 1e-4 to 1e-3
+- **Start with**: 2e-5
+
+### Batch Sizes
+- **GPU Memory**: 8, 16, 32, 64
+- **CPU**: 4, 8, 16
+- **Start with**: 16
+
+### Text Lengths
+- **BERT**: 512 (max)
+- **GPT-2**: 1024 (max)
+- **T5**: 512 (max)
+- **Start with**: 256
+
+## Common Issues & Fixes
+
+| Issue | Cause | Fix |
+|-------|-------|-----|
+| "File not found" | Wrong path | Check `data_path` and `output_dir` |
+| "Memory error" | Batch too large | Reduce `batch_size` |
+| "Split error" | Ratios > 1.0 | Ensure splits sum to ≤ 1.0 |
+| "Poor performance" | Wrong learning rate | Try 1e-5 to 5e-5 range |
+| "Slow processing" | Text too long | Reduce `max_length` |
+
+## Environment Variables
+```bash
+# Set cache directory
+export HF_HOME="./cache"
+
+# Set output directory
+export OUTPUT_DIR="./results"
+
+# Set log level
+export LOG_LEVEL="INFO"
+```
diff --git a/configs/README.md b/configs/README.md
new file mode 100644
index 0000000..6bf0d43
--- /dev/null
+++ b/configs/README.md
@@ -0,0 +1,207 @@
+# Configuration Files Documentation
+
+This directory contains YAML configuration files for different machine learning tasks. Each configuration file is organized into logical sections and includes comprehensive documentation for all parameters.
+
+## Configuration Structure
+
+All configuration files follow a consistent structure organized into these main sections:
+
+### 1. Task Configuration
+```yaml
+task:
+  name: "task_type"                        # Task type: classification, completion, styling, matching
+  type: "specific_type"                    # Specific model/task type
+```
+
+**Available Task Types:**
+- **classification**: Text classification tasks (emotion, sentiment, topic, etc.)
+- **completion**: Text generation and completion tasks
+- **styling**: Style transfer and text transformation tasks
+- **matching**: Semantic matching and similarity tasks
+
+### 2. Data Processing Configuration
+```yaml
+data:
+  # Data Source
+  source: "huggingface|custom"             # Where to get data from
+  
+  # Data Location
+  dataset_name: "dataset/name"             # HuggingFace dataset name (for huggingface source)
+  data_path: "./path/to/file"              # Path to custom data file (for custom source)
+  data_format: "jsonl|csv|json"            # File format for custom data
+  
+  # Field Mapping
+  input_field: "text"                      # Field containing input text
+  output_field: "styled_text"              # Field containing output (for styling)
+  label_field: "label"                     # Field containing labels (for classification)
+  id_field: "id"                           # Optional ID field for tracking
+  
+  # Processing Parameters
+  max_samples: 1000                        # Maximum samples to process
+  train_split: 0.8                         # Training split ratio
+  validation_split: 0.1                    # Validation split ratio
+  test_split: 0.1                          # Test split ratio
+  
+  # Text Preprocessing
+  clean_text: true                         # Clean and normalize text
+  remove_special_chars: false              # Remove special characters
+  lowercase: true                          # Convert to lowercase
+  min_length: 10                           # Minimum text length
+  max_length: 1000                         # Maximum text length
+  
+  # Output Configuration
+  output_format: "format_type"             # Output format
+  output_dir: "./output/path"              # Output directory
+```
+
+**Data Source Types:**
+- **huggingface**: Use datasets from HuggingFace Hub
+- **custom**: Use local files (JSONL, CSV, JSON)
+
+**Output Formats:**
+- **classification**: Raw classification format
+- **instruction**: Instruction-following format
+- **conversation**: Conversational format
+- **qa**: Question-answer format
+- **styling**: Raw styling format
+- **alpaca**: Alpaca instruction format
+
+### 3. Model Configuration
+```yaml
+model:
+  name: "model_name"                       # Model from HuggingFace Hub
+  max_length: 512                          # Maximum sequence length
+  num_labels: 6                            # Number of labels (for classification)
+```
+
+**Recommended Models by Task:**
+- **Classification**: `bert-base-uncased`, `distilbert-base-uncased`
+- **Styling**: `t5-base`, `gpt2-medium`
+- **Completion**: `gpt2-medium`, `gpt2-large`
+- **Matching**: `sentence-transformers/all-MiniLM-L6-v2`
+
+### 4. Training Configuration
+```yaml
+training:
+  num_epochs: 3                            # Number of training epochs
+  batch_size: 16                           # Training batch size
+  learning_rate: 2e-5                      # Learning rate
+  weight_decay: 0.01                       # Weight decay
+  lr_scheduler_type: "linear"              # Learning rate scheduler
+  warmup_ratio: 0.1                        # Warmup ratio
+  data_dir: "./data/path"                  # Training data directory
+  output_dir: "./model/output"             # Model output directory
+```
+
+**Learning Rate Guidelines:**
+- **Fine-tuning**: 1e-5 to 5e-5
+- **Training from scratch**: 1e-4 to 1e-3
+
+**Scheduler Types:**
+- **linear**: Linear decay
+- **cosine**: Cosine annealing
+- **polynomial**: Polynomial decay
+
+### 5. Inference Configuration
+```yaml
+inference:
+  model_path: "./model/path"               # Path to saved model
+  device: "auto"                           # Device to use
+  batch_size: 32                           # Inference batch size
+  return_probabilities: true                # Return probabilities
+  return_top_k: 3                          # Return top K predictions
+  max_new_tokens: 128                      # Max tokens to generate
+  temperature: 0.8                         # Sampling temperature
+```
+
+**Device Options:**
+- **auto**: Automatically detect best device
+- **cuda**: Use GPU if available
+- **cpu**: Force CPU usage
+
+**Temperature Guidelines:**
+- **0.0**: Deterministic (always same output)
+- **0.7-0.9**: Balanced creativity
+- **1.0+**: More random/creative
+
+## Task-Specific Parameters
+
+### Classification Tasks
+```yaml
+data:
+  label_encoding: "auto|numeric|string"    # How to encode labels
+  multilabel: false                        # Multi-label vs single-label
+  label_separator: ","                     # Separator for multi-label
+```
+
+### Styling Tasks
+```yaml
+data:
+  instruction: "Style instruction text"    # The style instruction
+```
+
+### Completion Tasks
+```yaml
+data:
+  prompt_template: "template"               # Prompt template
+  completion_length: 100                   # Target completion length
+```
+
+## Advanced Configuration
+
+### HuggingFace Specific
+```yaml
+data:
+  hf_split: "train"                        # Dataset split to use
+  hf_cache_dir: "./cache"                  # Cache directory
+  test_split_from: "train"                 # Source for test split
+  val_split_from: "train"                  # Source for validation split
+```
+
+### Custom Data Specific
+```yaml
+data:
+  encoding: "utf-8"                        # File encoding
+  delimiter: ","                           # CSV delimiter
+```
+
+## Usage Examples
+
+### Basic Usage
+```bash
+# Use YAML configuration
+python scripts/task_type/data_processor.py --config configs/task_type/config.yaml
+
+# Override specific parameters
+python scripts/task_type/data_processor.py \
+  --config configs/task_type/config.yaml \
+  --max-samples 1000 \
+  --learning-rate 3e-5
+```
+
+### Creating Custom Configurations
+1. Copy an existing config file
+2. Modify parameters for your specific use case
+3. Update paths and model names
+4. Test with a small dataset first
+
+## Best Practices
+
+1. **Start with Defaults**: Use default values and adjust based on results
+2. **Validate Paths**: Ensure all file paths are correct and accessible
+3. **Monitor Resources**: Adjust batch sizes based on available GPU memory
+4. **Test Incrementally**: Test with small datasets before full processing
+5. **Version Control**: Keep configurations in version control for reproducibility
+
+## Troubleshooting
+
+### Common Issues:
+- **File Not Found**: Check `data_path` and `output_dir` paths
+- **Memory Errors**: Reduce `batch_size` or `max_length`
+- **Poor Performance**: Adjust `learning_rate` or `num_epochs`
+- **Split Errors**: Ensure split ratios sum to ≤ 1.0
+
+### Getting Help:
+- Check the script help: `python script.py --help`
+- Review the pipeline logs for detailed error messages
+- Verify YAML syntax and parameter values
diff --git a/configs/classification/emotion.yaml b/configs/classification/emotion.yaml
index dd6958e..2827292 100644
--- a/configs/classification/emotion.yaml
+++ b/configs/classification/emotion.yaml
@@ -1,6 +1,6 @@
 # Comprehensive Classification Configuration
 # This file defines all parameters for emotion classification using the dair-ai/emotion dataset
-# Organized by level: data processing, model, training, and inference
+# Organized by level: task, data processing, model, training, and inference
 
 # Task Configuration
 task:
@@ -15,9 +15,9 @@ data:
   data_format: "jsonl"                     # Data format: "jsonl", "csv", "json" (for custom data)
   
   # Field Mapping
-  input_field: "text"                      # Field name containing input text
-  label_field: "label"                     # Field name containing labels
-  id_field: null                           # Optional ID field name
+  input_field: "text"                      # Field name containing input text to be classified
+  label_field: "label"                     # Field name containing classification labels
+  id_field: null                           # Optional ID field name for tracking individual samples
   
   # Processing Parameters
   max_samples: 1000                        # Maximum samples to process (null for all samples)
@@ -26,54 +26,54 @@ data:
   test_split: 0.1                          # Test split ratio (0.0 to 1.0)
   
   # Text Preprocessing
-  clean_text: true                         # Clean and normalize text
-  remove_special_chars: false              # Remove special characters from text
-  lowercase: true                          # Convert text to lowercase
+  clean_text: true                         # Clean and normalize text (remove extra spaces, normalize quotes, etc.)
+  remove_special_chars: false              # Remove special characters from text (keep for emotion analysis)
+  lowercase: true                          # Convert text to lowercase (standard for BERT models)
   min_length: 10                           # Minimum text length (filter out shorter texts)
   max_length: 1000                         # Maximum text length (truncate longer texts)
   
   # Label Processing
   label_encoding: "auto"                   # Label encoding: "auto", "numeric", "string"
-  multilabel: false                        # Enable multilabel classification
-  label_separator: ","                     # Separator for multilabel datasets
+  multilabel: false                        # Enable multilabel classification (false for single emotion per text)
+  label_separator: ","                     # Separator for multilabel datasets (comma-separated labels)
   
   # Output Configuration
   output_format: "classification"          # Output format: "classification", "instruction", "conversation", "qa"
-  output_dir: "./data/processed/classification/emotion"  # Specific output directory for this dataset
+  output_dir: "./data/processed/classification/emotion"  # Output directory for processed data and splits
   
   # HuggingFace Specific
-  hf_split: "train"                        # HuggingFace dataset split to use
-  hf_cache_dir: null                       # HuggingFace cache directory (null for default)
+  hf_split: "train"                        # HuggingFace dataset split to use as base
+  hf_cache_dir: null                       # HuggingFace cache directory (null for default ~/.cache/huggingface)
   
   # Split Configuration (Advanced)
   test_split_from: "train"                 # Source for test split: "train", "use_test_if_available", "use_val_if_available"
   val_split_from: "train"                  # Source for validation split: "train", "use_val_if_available"
   
   # Custom Data Specific
-  encoding: "utf-8"                        # File encoding for custom data
-  delimiter: ","                           # Delimiter for CSV files
+  encoding: "utf-8"                        # File encoding for custom data files
+  delimiter: ","                           # Delimiter for CSV files (comma for standard CSV)
 
 # Model Configuration
 model:
-  name: "bert-base-uncased"                # Model name from HuggingFace Hub
-  max_length: 512                          # Maximum sequence length for tokenization
-  num_labels: 6                            # Number of classification labels
+  name: "bert-base-uncased"                # Model name from HuggingFace Hub (good for text classification)
+  max_length: 512                          # Maximum sequence length for tokenization (BERT limit)
+  num_labels: 6                            # Number of classification labels (emotion categories)
 
 # Training Configuration
 training:
-  num_epochs: 3                            # Number of training epochs
-  batch_size: 16                           # Training batch size
-  learning_rate: 2e-5                      # Learning rate (typical range: 1e-5 to 5e-5)
-  weight_decay: 0.01                       # Weight decay for optimizer (typical range: 0.01 to 0.1)
+  num_epochs: 3                            # Number of training epochs (adjust based on dataset size)
+  batch_size: 16                           # Training batch size (adjust based on GPU memory)
+  learning_rate: 2e-5                      # Learning rate (typical range: 1e-5 to 5e-5 for fine-tuning)
+  weight_decay: 0.01                       # Weight decay for optimizer (prevents overfitting)
   lr_scheduler_type: "linear"              # Scheduler type: "linear", "cosine", "polynomial"
   warmup_ratio: 0.1                        # Warmup ratio for scheduler (0.0 to 1.0)
   data_dir: "./data/processed/classification/emotion"  # Directory containing train/validation/test JSONL files
-  output_dir: "./results/classification/emotion_model"  # Output directory for saved model
+  output_dir: "./results/classification/emotion_model"  # Output directory for saved model and checkpoints
 
 # Inference Configuration
 inference:
   model_path: "./results/classification/emotion_model"  # Path to saved model directory
-  device: "auto"                           # Device: "auto", "cuda", "cpu"
-  batch_size: 32                           # Batch size for inference
-  return_probabilities: true                # Return all class probabilities
-  return_top_k: 3                          # Return top K predictions
+  device: "auto"                           # Device: "auto", "cuda", "cpu" (auto detects best available)
+  batch_size: 32                           # Batch size for inference (can be larger than training)
+  return_probabilities: true                # Return all class probabilities (not just top prediction)
+  return_top_k: 3                          # Return top K predictions (useful for confidence analysis)
diff --git a/configs/styling/formal.yaml b/configs/styling/formal.yaml
index fb79712..d13d2be 100644
--- a/configs/styling/formal.yaml
+++ b/configs/styling/formal.yaml
@@ -1,29 +1,69 @@
+# Comprehensive Styling Configuration
+# This file defines all parameters for formal style transfer tasks
+# Organized by level: task, data processing, model, training, and inference
+
+# Task Configuration
 task:
-  name: "styling"
-  type: "style_transfer"
+  name: "styling"                          # Task type: classification, completion, styling, matching
+  type: "style_transfer"                   # Model type: style_transfer, text_generation, etc.
 
+# Data Processing Configuration
 data:
-  source: "custom"
-  input_field: "text"
-  style_field: "style"
-  max_length: 256
-  train_split: 0.8
-  validation_split: 0.1
-  test_split: 0.1
+  source: "custom"                          # Data source: "huggingface" or "custom"
+  data_path: "./data/raw/styling/sample_formal.jsonl"  # Path to custom data file (required for custom source)
+  dataset_name: null                        # HuggingFace dataset name (required for huggingface source)
+  
+  # Field Mapping
+  input_field: "text"                       # Field name containing source text to be styled
+  output_field: "styled_text"               # Field name containing the styled/transformed text
+  
+  # Style Instruction
+  instruction: "Rewrite the following text in a formal style"  # The style instruction that guides the transformation
+  
+  # Data Format & Processing
+  data_format: "jsonl"                      # Data format: "jsonl", "csv", "json" (for custom data)
+  max_length: 256                           # Maximum text length (truncate longer texts)
+  min_length: 10                            # Minimum text length (filter out shorter texts)
+  
+  # Text Preprocessing
+  clean_text: true                          # Clean and normalize text (remove extra spaces, normalize quotes, etc.)
+  lowercase: false                          # Convert text to lowercase (false for formal style to preserve case)
+  
+  # Data Splitting
+  train_split: 0.8                          # Training split ratio (0.0 to 1.0)
+  validation_split: 0.1                     # Validation split ratio (0.0 to 1.0)
+  test_split: 0.1                           # Test split ratio (0.0 to 1.0)
+  
+  # Output Configuration
+  output_format: "alpaca"                   # Output format: "styling" (raw), "alpaca" (instruction format)
+  output_dir: "./data/processed/styling/formal"  # Output directory for processed data and HuggingFace datasets
 
+# Model Configuration
 model:
-  name: "t5-base"
-  max_length: 256
+  name: "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"  # Model name from HuggingFace Hub
+  max_length: 2048                          # Maximum sequence length for tokenization
+  max_seq_length: 2048                      # Maximum sequence length for training (RoPE scaling supported)
+  dtype: null                               # Data type: null for auto detection, float16 for Tesla T4/V100, bfloat16 for Ampere+
+  load_in_4bit: true                        # Use 4bit quantization to reduce memory usage
+  token: null                               # HuggingFace token for gated models (e.g., "hf_...")
+  
+  # Training Model Parameters
+  training_model: "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"  # Model to use for training
+  training_max_seq_length: 2048             # Max sequence length for training
+  training_dtype: null                      # Data type for training
+  training_load_in_4bit: true               # 4bit quantization for training
 
+# Training Configuration
 training:
-  num_epochs: 3
-  batch_size: 16
-  learning_rate: 3e-5
-  weight_decay: 0.01
-  warmup_ratio: 0.1
-  lr_scheduler_type: "linear"
+  num_epochs: 3                             # Number of training epochs
+  batch_size: 16                            # Training batch size (adjust based on GPU memory)
+  learning_rate: 3e-5                       # Learning rate (typical range: 1e-5 to 5e-5 for fine-tuning)
+  weight_decay: 0.01                        # Weight decay for optimizer (prevents overfitting)
+  warmup_ratio: 0.1                         # Warmup ratio for scheduler (0.0 to 1.0)
+  lr_scheduler_type: "linear"               # Scheduler type: "linear", "cosine", "polynomial"
 
+# Inference Configuration
 inference:
-  batch_size: 32
-  max_new_tokens: 128
-  temperature: 0.8
+  batch_size: 32                            # Batch size for inference (can be larger than training)
+  max_new_tokens: 128                       # Maximum new tokens to generate during inference
+  temperature: 0.8                          # Sampling temperature (0.0 = deterministic, 1.0 = random)
diff --git a/data/alpaca/test.jsonl b/data/alpaca/test.jsonl
new file mode 100644
index 0000000..659cab5
--- /dev/null
+++ b/data/alpaca/test.jsonl
@@ -0,0 +1 @@
+{"instruction": "Rewrite the following text in a formal style", "input": "This is really cool stuff!", "output": "This is quite impressive material."}
diff --git a/data/alpaca/train.jsonl b/data/alpaca/train.jsonl
new file mode 100644
index 0000000..2af6ff3
--- /dev/null
+++ b/data/alpaca/train.jsonl
@@ -0,0 +1 @@
+{"instruction": "Rewrite the following text in a formal style", "input": "I'm gonna go to the store later.", "output": "I will go to the store later."}
diff --git a/data/alpaca/validation.jsonl b/data/alpaca/validation.jsonl
new file mode 100644
index 0000000..4be4e50
--- /dev/null
+++ b/data/alpaca/validation.jsonl
@@ -0,0 +1 @@
+{"instruction": "Rewrite the following text in a formal style", "input": "Hey, what's up? How are you doing today?", "output": "Hello, how are you doing today?"}
diff --git a/data/hf_dataset/data-00000-of-00001.arrow b/data/hf_dataset/data-00000-of-00001.arrow
new file mode 100644
index 0000000000000000000000000000000000000000..52b04b72bebabeab4bba7fd0154605e1b2d258e4
GIT binary patch
literal 2208
zcmds2v2GJV5Zw^lSjIvwB9sPcI2UP*p|&C-QQ2*TAO!+#>^*zM`S!Toi?Je06cm&x
zDfj?NK7x;+KzsrJVBYNA1<R6?xS+}S=5BZ9?e5H*x$|<lT(<5A(Gn~TabMI#Rn#$p
z>{Kr8O-w6=eSe3?a6{Q%$DMt3jQ>E{_Zxa1=r}Y%kIsWoNtdO{`iYLMOS7TVCie4@
zn<omR0A?MJgAN66OVdjkWhz*}q~j!WTi{LRwp-xTha2y|SS*CNha4x_c#N1sIaEIH
zFR;CaK7}$5pTRGfsxrU(g$)ny;{$o@fcd@wSA###M2N4z{M>-60_PdR^9L|3v4?up
z#9d#ZR#C@14g!W_W8oZD>H$0dguoWd6BKxlL6D|+P;^Gq?<jUFJ|E(Py)-b-`+$4r
z7nF0}^}hVZy|PL2{ww=Xh))HO`COH9UgEzM&eP)A;-*}Ya$e%8@V_ZAAJ_2Zyu@D=
z{)?4Fn4guY-)Y-9FX})~pq%Tmnx)z)auY?Ni6S#YR}psz>6j#J=~PB!-8@qN3yUzk
z5bk{_b2>Pf(%8hY#GM>2#AlsJ6-CmiwA)wn4$UUg9azeeBkG$ONqFZbqml7Bn30?x
zy_jfAxKbsfXih_8IGHRAAK{Qf9^!cj<=PBrrlV-%p&+P3lWWd;pE09)n&87@^_ik6
zhpc7P%@2Hx+K%w;<Jvw~pqvAgF$ll>J|SP}Xr$~g)u#$xa?-x*P$G4zMu^c(@)h{h
z<k%!NXBrDA%u<k*BGs77*riv#z!DVL#Ke|sjlJ_?TTkrD!Hq_{-KN36NP3NJ%3dx<
zSiQz|HO#@sMOeUJ#r@$E`|LK8b<>7kC#|op?8)0s+vfLsov0GI`R!z``2UCB(!Y48
BL1q8|

literal 0
HcmV?d00001

diff --git a/data/hf_dataset/dataset_info.json b/data/hf_dataset/dataset_info.json
new file mode 100644
index 0000000..6bff0b3
--- /dev/null
+++ b/data/hf_dataset/dataset_info.json
@@ -0,0 +1,24 @@
+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "instruction": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "input": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "output": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "text": {
+      "dtype": "string",
+      "_type": "Value"
+    }
+  },
+  "homepage": "",
+  "license": ""
+}
\ No newline at end of file
diff --git a/data/hf_dataset/state.json b/data/hf_dataset/state.json
new file mode 100644
index 0000000..711aac0
--- /dev/null
+++ b/data/hf_dataset/state.json
@@ -0,0 +1,13 @@
+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "4e028847697e7b16",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}
\ No newline at end of file
diff --git a/data/processed/styling/formal/alpaca/test.jsonl b/data/processed/styling/formal/alpaca/test.jsonl
new file mode 100644
index 0000000..ee6ece1
--- /dev/null
+++ b/data/processed/styling/formal/alpaca/test.jsonl
@@ -0,0 +1 @@
+{"instruction": "Rewrite the following text in a formal style", "input": "That's totally awesome!", "output": "That is quite remarkable!"}
diff --git a/data/processed/styling/formal/alpaca/train.jsonl b/data/processed/styling/formal/alpaca/train.jsonl
new file mode 100644
index 0000000..93f0ddf
--- /dev/null
+++ b/data/processed/styling/formal/alpaca/train.jsonl
@@ -0,0 +1,3 @@
+{"instruction": "Rewrite the following text in a formal style", "input": "I'm gonna go to the store later.", "output": "I will go to the store later."}
+{"instruction": "Rewrite the following text in a formal style", "input": "Hey, what's up? How are you doing today?", "output": "Hello, how are you doing today?"}
+{"instruction": "Rewrite the following text in a formal style", "input": "What's the deal with this?", "output": "What is the situation regarding this matter?"}
diff --git a/data/processed/styling/formal/alpaca/validation.jsonl b/data/processed/styling/formal/alpaca/validation.jsonl
new file mode 100644
index 0000000..659cab5
--- /dev/null
+++ b/data/processed/styling/formal/alpaca/validation.jsonl
@@ -0,0 +1 @@
+{"instruction": "Rewrite the following text in a formal style", "input": "This is really cool stuff!", "output": "This is quite impressive material."}
diff --git a/data/processed/styling/formal/test.jsonl b/data/processed/styling/formal/test.jsonl
new file mode 100644
index 0000000..ee6ece1
--- /dev/null
+++ b/data/processed/styling/formal/test.jsonl
@@ -0,0 +1 @@
+{"instruction": "Rewrite the following text in a formal style", "input": "That's totally awesome!", "output": "That is quite remarkable!"}
diff --git a/data/processed/styling/formal/train.jsonl b/data/processed/styling/formal/train.jsonl
new file mode 100644
index 0000000..93f0ddf
--- /dev/null
+++ b/data/processed/styling/formal/train.jsonl
@@ -0,0 +1,3 @@
+{"instruction": "Rewrite the following text in a formal style", "input": "I'm gonna go to the store later.", "output": "I will go to the store later."}
+{"instruction": "Rewrite the following text in a formal style", "input": "Hey, what's up? How are you doing today?", "output": "Hello, how are you doing today?"}
+{"instruction": "Rewrite the following text in a formal style", "input": "What's the deal with this?", "output": "What is the situation regarding this matter?"}
diff --git a/data/processed/styling/formal/validation.jsonl b/data/processed/styling/formal/validation.jsonl
new file mode 100644
index 0000000..659cab5
--- /dev/null
+++ b/data/processed/styling/formal/validation.jsonl
@@ -0,0 +1 @@
+{"instruction": "Rewrite the following text in a formal style", "input": "This is really cool stuff!", "output": "This is quite impressive material."}
diff --git a/data/raw/styling/sample_formal.jsonl b/data/raw/styling/sample_formal.jsonl
new file mode 100644
index 0000000..0a2d5a2
--- /dev/null
+++ b/data/raw/styling/sample_formal.jsonl
@@ -0,0 +1,5 @@
+{"text": "Hey, what's up? How are you doing today?", "styled_text": "Hello, how are you doing today?"}
+{"text": "This is really cool stuff!", "styled_text": "This is quite impressive material."}
+{"text": "I'm gonna go to the store later.", "styled_text": "I will go to the store later."}
+{"text": "What's the deal with this?", "styled_text": "What is the situation regarding this matter?"}
+{"text": "That's totally awesome!", "styled_text": "That is quite remarkable!"}
diff --git a/data/raw/styling/test_formal.jsonl b/data/raw/styling/test_formal.jsonl
new file mode 100644
index 0000000..7d6d9fb
--- /dev/null
+++ b/data/raw/styling/test_formal.jsonl
@@ -0,0 +1,3 @@
+{"input": "Hey, what's up? How are you doing today?", "output": "Hello, how are you doing today?"}
+{"input": "This is really cool stuff!", "output": "This is quite impressive material."}
+{"input": "I'm gonna go to the store later.", "output": "I will go to the store later."}
diff --git a/data/raw/styling/test_missing_fields.jsonl b/data/raw/styling/test_missing_fields.jsonl
new file mode 100644
index 0000000..2005649
--- /dev/null
+++ b/data/raw/styling/test_missing_fields.jsonl
@@ -0,0 +1,5 @@
+{"text": "Hello world", "styled_text": "Greetings, world."}
+{"styled_text": "This is a formal greeting."}
+{"text": "How are you?", "styled_text": "How are you doing?"}
+{"text": null, "styled_text": "Empty input example."}
+{"styled_text": "Another example with no input."}
diff --git a/pipelines/__pycache__/__init__.cpython-311.pyc b/pipelines/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..11c84889e3fe7ca847d5d00ec9fa53b86d27b1be
GIT binary patch
literal 184
zcmZ3^%ge<81fTa#&H&MmK?DpiLK&agfQ;!3DGb33nv8xc8H$*I{LdiCU*7tm#i>Qb
z`nie8N%{HN`Yx%(*(Lb}`bjyd#l@L<>G6q~`nlOD`e~VYsk$YlAR@6iTfZQ)AT<Xl
zRIDE#pP83g5+AQuQ2C3)CO1E&G$+-rh!toy$o67>An}2jk&*EO1B@tQ28sayB&9Di

literal 0
HcmV?d00001

diff --git a/pipelines/styling/__pycache__/__init__.cpython-311.pyc b/pipelines/styling/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd21bb234da2fa28b3f421778cc872160c878f0a
GIT binary patch
literal 192
zcmZ3^%ge<81fTa#&H&MmK?DpiLK&agfQ;!3DGb33nv8xc8H$*I{LdiCU%~pJ#i>Qb
z`nie8N%{HN`Yx%(*(Lb}`bjyd#l@L<>G6q~`nlOD`e~VYsk$YlAR@6iTfZQ)AT<Xl
zRIFcIQVC?F>&M4u=4F<|$LkeT{^GF7%}*)KNwq6t1=;~}MKM2+_`uA_$oPQ)Miemv
F#Q^?{GIRg{

literal 0
HcmV?d00001

diff --git a/pipelines/styling/__pycache__/data_processor.cpython-311.pyc b/pipelines/styling/__pycache__/data_processor.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5e118b1ffbf3eefe0e2008e32b063710c803d75b
GIT binary patch
literal 75235
zcmeFa33OX`dM}2JBtQTJNP;A|0^A8oBDL?7#6{FXky@ygwnRa^phStAE<jm=LED-7
zy`fz-;rKZ<$Is4S=GZfI#?x?KW)jYHrqN92HA;GNdM|TR;h@!v+Qfa&O?wU<XIe++
zw5PxCe}Rj80Z^hRGdZuP1^@i+f8W>d|9#*8J9c}%4!{5W&)&Py@DDoO-;g2svnMp4
zUoq%(@8~!kryti1$zT1Dftig%CVUO!ITN`<xq5Olj^|C7hs+a}A<IPmQ2vB<$U0#g
zvQ5~B?9AOXUNGSpa!fdfoXo5rFB~dFxSa8#iQ=JRW|uqenkX46nJ679P53DrDubWA
zarZ>|P<g_xVyFUk`ti!4O7^W9s$$>jp=yIJP<FE>S&)F|+j^buyZD!XL*9TlTAMVf
zzX(;AG;71?b%A1o?ud>vzpmpf-$spm7yt6_+xUDpY0wSTGk^K;XVv-}YG5ulxYz?F
z-!>u?{^j3LBXcW&n<LPe=GMgAoNz15aBF66MQ|%lORa@#<;ns+&W-<a{8!+=GEhI7
z&sDviGt|m$;5@ML;y>MojjM%^4O|@yv2oPI)xT~U+Qc;ke4_@g5&AZ+iMg2rzJP7i
zP@XJ?4t|^A-p;kWt{d7M@Nm9cte$uq+GMk~rqGW1LH6(4_$kFO;M2!Vr~IKCzTCK}
zXKW-CH=P&@hT`Uv)1k4cN&k2}=iDSTabxG?y?D;qnd$L>&k)b+@rV4uKqzh-pYn6V
zoNPi|7SfK4`-4IFGIn-##|!+|f+5~N5}F8vZcK6T<AiZ+ayS$ShK7UF<71%_r7X}S
z+voEo^qzNg0o{<E(+wHUA=g7j&M;&O<OFg#<9>MMOhb7Ab08;G7jVf*xL7h=a+yos
zP(F*n%uJRcD>LOYlXb|(Og3i1_+h33W-<*GAV!BKEeCVKxC#^^<w9mE8Y*I@;)JP~
znOw|NGUN)Bz-K8ll?|0LlRIH5W2SOusu*$y9PnAmOjSeW%v8-xHA59FUmj-i4plN!
zEi=^(RWVaNGns~}GisqB;jf1IYfP9t%+$n8%^0mz$1Tj{8>(fdR%Y5TRL4vknQ7Bd
zJu|g2lW9=rYmeIpL-)qVCa-r-O^%LT4}0YA4DYA@<Q<*jy+PT77d_}!FmAmubNxE}
zkNQUfaVvG7;o#H^KN1*-+Y<e1c+x)+kh|9KH2P{h=Vowfay;Q1QBL^LZ$p8*p}2Ky
za(X5-JUSK_=i*jMDZrtp!NoQ;6H=U>Zafosi609EyrCNbWNLhT>LujCOFq40lU_e%
zbizN*@``MYO`;Faj8GBc)(QXJ;h=v4BO&<j-~BrMK0O$>%AK0^=Y;K#4#u5#{NrON
z1;WW)J)WQH_GjbyBjW)-jfA_Q-nfeoOibMgz+Yfw%s)Ona>LID;}+yBz>i?CeM;*_
zJbwa1XFM=@J#^#K|A4Uq3uH&Oi037$Hf~QAmetME+_rYA%6NXlnj7QeIV{6*^NrC&
z&TP<)_(yI8hRNBH%IEMXKQ$3AKv86$%=|QGCN#QfcibG99GT)!cTe>j(L{mqu?bWw
zAO3H;0?4^{Ea>%5qDGbKLxuZ8UM>(E;m588;L5teM(?zLj1O?$m&QUj<Pc~p+3z$z
zb!Q9#yrVNbT8sCNOifaawRtbF7W8}hKyZ3$GDwYyxaHOivX)gPoS$D`U++DZ%E{jR
zmuHifNGxSMlZcLN;&%V|w131uj9g4ihr;>$BY~;V@E8|45Vr)Tg2SPyTY*VmL)<*f
zdg<_RJb!q2Vv3s?4?u4l9=<)}AE*8cQ{M0}H#IUm%okHj8c`>4$MCR!a&jufx=0XC
zE;#YI1WE{$5uiDTuOv`SfL7VKacnZgdr6lwia9vM(<;i>6KEh?)3vFoalVnvO$1s9
zv=Z1zppC$00$T`dC9sXa4g$Lf>?W`W;3M7VfV!ub+Rp_8e6W4OKZ3|_wf6*qw?b3X
z?bk36g6ILm{;~FnTU`6-*koW+Xodhrc>DC&G<wBkAlRPhovbfXg^nQH6yG*|k1wRO
z2vG5Z|1)O(*HnK`Og_Qnds5c$THk!JU}}I8O%1WSrq@o)w=8UXdy`;lf)Y(lF;D$#
z$L8)X81K&trg|vRR39s=k;ByprW(;yvzn`OCv2((Q?+QSj=38V6TCM<iKfO_*<m@v
zVZn4*G#y3=q}4a?UZ{T?L6W}8t6*0mn5v*eQ&p^{Rxr8d1A+<PTG3P+b6380d_G4o
zRYHlTN=mp2HYUX;R*QVQ7f_&N{!1&7|4_<9sT%=|@B}5A+_BOM*<2x*DnwHSqO7QY
zt$%)-V5)->O?9!-8resxv?W!q9i1CV#hInv5KmS8YbWOiQ}I;PzBVx5trP}6(&H&{
zBd<XvZ>zbMid@Res?L&=%fM$$SE?(mlYJ&CMvj#4NjCZjG|Ere1OGX!QtLmh{s9x0
z&lzdO3FN6)ef4gLGX>0CPQb$D2J!<I_{&YL*lK^HW-jmb9PRE1dx0;P0_8-z7c*nG
znjWj2vtVVk#_c_rgU`zg!W19&CA65?k}D!j(q5V{#zOZn3v%)*gtfuMrte(Ld{|TC
zwy|Jx_yo9kE*}WZ@RK8043r;LBAGw;nsvIbI=@=@j_$f{)_5J8sBh_KO%YR%?sE5R
zP9!IyyQAkTBF3l^RIT9*k(_^x1IGKt^sV`QZzP8^B{rJX5nV*J2@9628DCCDJUtO3
zZ{l(z#!<tdE~F|0nnxGWMU^eTI$zpm>(Ob=+B9Z!L%FH2QBz8v(sAZUF3M@iEN8^P
z8@T+*+=%{WPSXFIPG)Omwo2S@79zKKNk8hZHg`R`dE@0RFQQztd9&t-Ip|qO{%nyv
z<j<a&zx&PW2yJGe3qmx(CVmmKww4gP<Nb`*;heJ;{O5;g!CE6EdeR^(jdj-chBIPA
zf2v=z{R`i4BBdghpW=x1U+dn_)#gL5i&!{U82xwF@<!ovVk>#0@VT+&Ct}OLfA;w`
z#qw1eu^?X=J${{`%Od&NLZcS#&#A@tleHM4l|NbAwx~6!SAXAda&GP6GvrRWYh8kC
zw?{c^kBVouhdoh)_I2f}@Yj;BO4hRL%-8+jTW8x;MeNzyMm_7yIY;$e_3Idy&zV&U
zG@%L+s%9Ob9FYRfd5g_KeASwx#1kn%zxJ|Lv1g9Th+|!GI1oqeT5+ti#_A|1zw%h9
zk01}3p(AGg51yYs*TC}Lm>E7qTj9hnL_6Zdk866?45u9v+F1v4*yyY^Vvbl-n}u21
zI`Y^|a~RhWS@$UAd|d0lP$6#+dc&HbGsaSu&>P>+)0RcAyMKM&cz>IwF4AT*^dGja
z%B!w#Q`nQ-i)gkYK=63kfj0P&k#9#W$MbOrOq^%wz#R;QE&XF`^Y3jA=QYc|nb&aT
zS;F$jHr^3-$njnacmp^^-Rm7uFoTGdh69~FKHm<|r_*EC7tviVp4Ep`z9M>UG0*GX
z(W9s?PB*CgNI&qAK5oPz7)U2W+va#a%e9P_qJnz8{9$~aTJ~QLOa|^w^9RB^HJh=v
zM7ePKmXQ-IY}@|vsS*Ep@Iadq&W~t<be8%L_<Kz!RGxrZ%74E*n*a5a@12y*YRGoE
zVQZp(3C9qw_l9ub18%|3a0p)i2tO6XNq8JqgboX~wY9}_7$PH{E1%fIHG{Xtrl%=*
zss(6wDA%VqoGS-?YD-i&J4ZjYCeuKSJXJ)vB-vandN0G1py_g&3F{HI%b_=VafF}n
zeoECJE=`7zbCL}5w8p#1dko%!-ibsHP(``d`_z-+u8v@@H?DhXKSs!!WQ<`~vv*=9
z7@}T+qcN2gd3suKlHqL>Urxk(_TDsl63<WZ!M$EhU!qMK>r3O%{H|W<O>MLKh&~Ft
z%-<Wpt7|8RsH)$k{A;&yORbzf(FfZ(Y+W%zuD~z<0>HpWz<pi^b~J~|G3_(*XJH#R
zQLVDWe316JUhiwHSMuj!_*MJkeS{5m1ALL_kTrgx#veVq9Qu>HKfe3WDxA9@9=s?W
zy!hzgm6d~6#DiC*gID2W;Wj{;VqNt=j@sm}p-B7)K%(;aQL><k|JPS{{~5)mt~<|&
z;2DW>4@`e*{<-;Km2lybcxXsEH1z1u)s;h6#Y5MmL)XZM9w1H0!#uZlY6hpau$K*>
zpp4C8GakA!Lue)Ni{aW#ABo|SL_vlN(a{gnY$=CDPY)L<R<dWZs~Tj}h&O>x0S=A2
z8L(wQSb3l7_kK!!Eu80NgVh_h?3JVIXdd{Kus-1e%ni*Oy>f&d%}-5a_BrCY{^{w!
zBnLFr%tSmFv(8K)7|$J_x_&*tqbpBh@=@?on5`&Heh?7?EjBqb5dgk75YHb2UMLvy
zPmTn54sQGb@{q&(C$9(MMqo01Mf_2+KStm<Ip)g4I_|(U9T*7#dn?b07)vxa#_eqG
zOyXaYGd&^L_;VD#I58UyvkD&`L4$<iuB4@0%7m?cjP)v@ZZVw<C(^aC=8{7P<96AU
zsO?mNgGETooAx_&h>hoXN!G<AT)&PL=M<t19>yg0#TT#XfO08v&E?`#xmT#z12yM}
zl{BpCOqK%-%pHz7Tnjyd!wVI2mcC<u%P!RHUTg%AmFV0fIrj)m#R^LonpX;Ig~Hm!
zhQ*nWH$bBV+AlggBxi@<?Dzud*@{+mdP{Ar*uBs+-yzt%pSUaKMQxe;lUqN&^`nWO
zOu#Hwo{#|DCnWcYIs0RC!S7h#v<j}X(M<rd63yo%^EttM4zbp?38jbUug_m!+9_>2
zJZFv7w+m%Q=kLznT{<uAI67y8pXLqUx%sV|-yHv7d{wv2(r5ULLD5z>cYGl~R$RTR
zGuX=?qo~%mtU}ev<skrBiOy4!^OWE`6?3{ju@(JJ-<y4cdnkGn0P3eB4_bdT^^+;t
z1Sr~GkZdmqwil3SeS={0#0YvnsceW2iItnB%FU~~TzeY>U(1`<&u@<v*F-s?xD_hq
zs(7dWt$v~I#_|RLsE5bpuMilOHKJ=wa*YYDv6!oQaf|5MD7czq#hz$LDBb|2#Bg(Y
zzfgA*>LCGA*AX9SMAt3JbxUyFin&@AJ4F{NwdIRM$rkn|3Uc_OtWk>dSS~twc#%{u
zN>3Wmc3HAr7HpSM-pZ=EzQ@{vHc~+w<$^vgtBp2%r{!BMi~E)hV%<KeZr@J_#j;MR
ztP|B=*r?=vY6+#EA~kq9p&6f$HKJ=;a!m`a>6ptCMe!Tv`%=;FUoH~s_Dgm9e>x(T
zbxCDi+GuYtp-OI(IyaoqOd=Js5na6G;sqC<idJ1EJ%YPu>16;Y(RNs}9Tsede+k^C
zN4?Z74#6bW_$9zfzf|d;JMkDkRCD^`O_)T_hy++QB2|sd^*?b`F8Eg*9>L*J2F>+o
zM5wt=3K^9(HP=Pw4as>!aNdYHsupgqXvgMQbXKStBXt3*FVobFiO!pn^QPduscPI_
z!QH!T1Ar23M<m-3!FEK|wnqi`(dB9YDA9IIvK<p_$I!IS3d{`lK7%qujxO&Ns*cJ^
zbRLtO#{}oG$K^He2cug)99eAr=F|sMqGzY%*(sLqlFD~cQ}h{RINuqoY>fKf>zMC(
zj2_?jR^O^lS9{#BSSx>JxWxVOjem1v`Q?XQ;?9%O&XdBaLE-cuptSR>+MmGPc*Kpb
zaO1+nJ>lh9k$XkrUV(>0hHj*%>y$ra`UjmZpzk3|B&LR+>2xK1<O~RV1^(kFj-vUU
zZ@hf}W!6T~Qo+$In49Gd4XuC4AF!b^Ui6Q!8~&>89o-l(qWCwwXW)$Q8D@<Pvu9$M
zy<`|2@TG(W%we|5))3g!bz&#y^_S}zZYOs(FOnCmrxcN9?t4bU|3!>kUh32tkuRzB
zur;S_IZH|}`?$X$q{1~u($0XX_&MNuK|laZ@2FwafOGB$%ZcSp+nfzJi+tcL^3pLK
zv-y#HejsE~DnJ*>*B-t?Dx6e?TM~hf;5U^#KGPSq3j3w|;;iqPB4*B}D!qlSn(uQF
z6O{<%X?fG8#;dVX*|U{OTLPT+ao*<&QZc{5F8%NOvbRWDdpPbI?y64l&uR<8WsVp}
z43zI@`AbV7qs8a-_ZuTdTqHQdTRK6QaW5S3(yag;(&?%g6czy<q#I)+I7+6UDB0o9
zXW;K46Sx+qoz!q}EF2&zjoicrD-V~?JPx7!1$g2w0$}gW0u<7oG;zm_1F}C9j29$#
zRPx4<f1Seo3j+TMfaVZEtxNdh&*_}=yL!5b!12B@{dk}K(3N>O)3)Q+4ELK*Cwg}H
zLIG(9%C?#~kI|t^-r?o2bFSv(@HBt_IBIJIx4H+yTeMp<zg)dZvKAE@Knd~-#2qA*
z2=-}$=aj6L&wgl(_I&5)w~j8JTj~-Uc1aDpMDK3NyIb+XrX-(<pFvRUfc<ylrU{S$
z#B(PC+?aoo|Cg}W)Sd|z>_FQKjp{&K`K$x2wvLuS$dx#%RuTEnM+W(nidtiv@;4B(
zrf%dD>E`uTN)o5?-%&htrbwPe>(2`HXBQhD^!(}3pC5gA?q?@|aS|5MPJlGUs)he5
z!m=Lc%TH~!`P*d6+LbDerxS17FgeZB@s0m&0uul}d-_Sj%$hMc7F5L*ChzQ=P#_P2
zq@Q>`>mo9xiA~ktaRx|sb#|8E-$WEan&Z^F|HRJUk=Ap<)z7TMC`Dz!N<^D|v#zZC
zNo`%sQ@fgDbQi4Z2;}6t^06~><<FT{i@x**y1~+~|4c_vy@9sa%dt^tYL59f#F|=G
z^GtQv?E}mgtmf;s9@5X77u=GwF1lBAw#7QTU@VuM_0fLGxpfJ42e7}!ma48XI`mD?
z2cB4CbF8ibvD6^T0CNMY1-h2@C9BZ1fB7yB(Fe{8jTcsRW_u|EqO*2Bf8lT}Ic6|2
z9t;79o>P+N)VvLQWE8lt^b=2Abie4?EO|CRR%yL;ALK3CMDGsCyW??P!w0^_6Jp&S
zscz4zCFh79Nf4YL5bAeIu3gLdqO1FhRj0192|Km&?by}WkLqPOpYzyTukM9QtuTr8
zXC*-IS;>2L-X7a~WWGS~v`fy-OC_Rnm*Cv>WYfO6qw^OfTg^gPw6(@;uK7!n%@cJ9
zw#`tIZSxPim-D}W@&_kHTeoZ{+BQG#IP{?6N5elEMp)+EF4@`^5y4)lCyuf?2O9t&
z#_RPJ@O#lM_<aOu&EPu#;x?Lg$3|otI-Vv4c9!M|-z8H|bdiqMZ2q?h(5%b9L!g^L
z4}rr3jwC@pG(qg8bgwCYF%t+0wmr!yTs8FTb6Qq)1fxBR#f#k1_K$BadZdOuvgLyg
z`72!I5}Mn0gqL2%rtB4c7wyfu4ZXBcdqsbQOn32tiPs;6DHXM>|IDb%EqeXMM4BLd
z%`I9r?8tGi>HrqF=(a`E;@OY$qX(rL&=$aM`7-+mLs#VQttnwTD9qdyrteC(?lHR*
zP-0KU$eiQ~p}H8szw*mZ=sy2_XflWo41pYw9}w|@c}=nc6K8teq(v1DSwL1G6D_P6
zj$2RMxm<2W-1%G{)<1K+pbI+znr%*iAQSXg4V?+HOYb!}co|71Ll|>JV?b1!Amx$q
zE`2EL2xcMWM|)f&{y)zJ02wmY44)yqmttmH4n~?FQyq9Y(2Rn*&ClPHv0aR+tR1_8
zcDf!&AhZWqC&XUE6by`yvQ&eVU;-~6H&K4V#o8jYk>&qD*`Qv{&><H4oJnrEqp;FY
zJY>&vbLaH*_&wmp0txgOVU>fSz%<Circ@aBL<Wugdx$r+kR(<`uZ-8pPel_hPs?k{
z=?TT~djx3A4fw3t#xq|)9_T(keJsCEc2NRp<Mv&$BM`%Xa1lT~ZrcP?+moE)Rf8eN
zzJRfb&*Iso`lZlv&kw>&?b4<$W|smqEL^W&VF9rA<htabp=kG_Ymxi7B6?Et(1e*|
zU!Gte;nZ2-+(nEzVR%Fu;)HXYbSjXFkLfg#Qdnjyw-{wzGuD#li_iZnxM;^3MuX{j
zggzj9#z-3?VxMOd1g$47HnN5+9LS0iX@f{Obv78y1zC@2$jVtj7Awb>3%h*S**Ggk
zhb>-mR3>BX1sSVq7}hI;2K)2`Qa0h!OJ2Q4XRfU=3T>zsHVU7XQ2l@c1s3uz=uqE2
zb7@y&U(tt5NuN4hl;X(vfwD&xA8u(4+C8IQXVGa+fGXc=T9}WG+N+aULx}c9iC@I9
z9*P)k%Cp9G<lYo9X3O(+oGn1(l!KEsqba(h<9j0pPDKJXo3oDC^E7v-X;d+|u{CG8
zlUA2HEkp!)p_P2{q$cfT|4&vH`-0H{Gg*ppLa$p#nEdRuq3I)5ZF_UJ^fG(}7i2e+
z>mO*160}{%xq^r>QID$AGN`Ja-^U!9dQ7AASw}slg+M(rQvAukT_dMXC8y5R*j-mn
z3x7j7{hh2iy`rf*6)q*!6}X~j_JizWdaZu&{FW+4K54#TKhXPNTZ#55N$(?i-DHy{
z@7eQFnl-kv%-G;h88@6e(c}8Ka@LMVFmlUP?U=WYlq)ptn1&TwM~_PjfgZ=$G3ND?
zdo+1{-so6QUMqhyd1a#`Rja9-W*;5v$!XPZD5o?!QaMew|9VD8^*{UQpbeXLbkt<_
z=XH$^4@#XGTei`G`7YJ}c?<Q%lzHA5{2Hw+DVtfdrk&C#JUQ<zmVbV~W*lXoeQKW-
z6UI?pX4!!^O{{ak?<HnxrjtjIadI!p?MPH2?PiquCJak;osfCf0y@z8TWo&e`__@;
z2BO;JvQUd@QniFp-iUrUYd#`LS0!iaTq&+f+fgY~wLfnNmcmBeLWZC{RGTa@u)ois
zzg6dwW?yap1_kc=<56;-B_8Xu^1wAEddLy3SzSk<Z_^Jeu`-iV2bz}2z8d({WkVaa
z!mPvw^9FS-V^^29>SG5~pQ=M{<9cg?+mu=Uw7shBHx{#99dpFC-aM;o=DE4IYSQ>J
zJxyu7=ce&Yn(>G=jQwJyfFRu&aT^o;rzc{Cp6xn#+_P{S!)5ScB_E8kJAcv%I@qL-
ze9|)?1t}lv=vQq-X^q}Q6qp+rooZ&!X-xGTqV7aqVZpV3yjjcc%kIP+v(Av(zf<wQ
zB~cc0q#$WW^T?#0+p<Png|kHwM{+)l6ej1xND<8t>@4tyWS5#ff@jpNx&B`MC3}pz
ze5j8#a_^We%$EDfOO$(6o=^YEdHymzP@Ct1HS?^laiyNu(T}!j=i0OrSN1tgOI@p3
zg|)GvZ)Q6!K7U8PT{BZ_`pT~<g&k=rs9JVCDd1da0<T9-YPH9{J!QjsIeI7E{IMT@
zn{gdY&KBeDwm8*`cut!SwVO5{vo38&Zl^kaHB~kFqfxew*54H=MjP#pxB&O8Hz%xB
z)6NPYMxKh8qOL@7H6JZL7#u+Q%gb!eDV{Bfl+5cv5C8lgv6tm^U!;WFlBzwdmnLm3
zt*ig;XK{6Ca5b8EzdYV1TTakEz+K5dZNAZS{*|W8m`^i*+yQkAsP%&^--mw9`Oa83
zxK8k%ed+qhpra^_l=5CpDYMTeU5S=0U84?5lQpBw(>m+08+E9}@XuI>J-?PZEQ^$_
zs}2t*>aZ+Z9fEe^$LnZ^Uer{ws)7H=#EC{A;-Fp^YEwMv8g%@IS+^$c?0CH++PJxW
z5qGwnenrRaoOQqAeo6n5?ym8Y?j^nNX!xKSi7XTLrsI(1%a_sdz}+#hV<(sZZh(c9
zL@|W75=NQ`MTkz8c=Gwc?HR^z4=P@upqT&$86Qp8Lxidh*HEH#U6Rdtua{(fDAEGL
z)tz_zW8;1$=^&BrC!*Tx<@-@#GFM3XIpP%eHnVG@1iy%!_owD0vqHFTU@FbWD5I-R
z#aSCJN>34_(B@<@!!=%R3VgL-toPpV-wAmA+W6wGnP6ZTmsG=Jqr?7W*7*TsGhCC%
zVS<PkcUzil#*J_Ym!&*uT*KYSdMae19ny0T$pvFz_88#<Ad;5blhudpy)A5E&d8$o
zQ;ub+(K|KjmAz1<vU|K#D&aPDbSe8}>O=^x3fvMlH2UHtR0B-LgQf~jQhM%WBjE;5
zTfC-Jxf5(aNq0YP)ab&GF`k4yDSz2LbfBJ67lbGf(aopY_jxts&Q>E7U${kG7wQz!
zYk?m_QJ)qjyUYy~05V1Hd7m~V`(c|`E(I~>DAmp?5?#3MI)dZ3dNABFNc2?RX^80v
zOosf|(eXI+8eD$PpyH>zTiP~zTdt!kh6{ad-T@|9WGqZ^fGejNas2yj?DsbZ6M{qj
z?BLVggOENVCrjg&3xpUke4DqEy9pi{Q28_Z6)zL3LYJ9)<tFZUn!AxYV8_$EjjVz@
z7_ARa=!0-oT3|*2mJIl5E4+Yl#X^<Fh{NJG5bASN6O805AoC9u^^So!Dv`Jx{;9?5
zJxjb^VEQTWde18yVlbQGgy{Bq2g2qPOfm?#Ba%Eyj6)%8qACiP4$52%te%OPAi=+2
zKqRizP?zbrsH222T~M`Sb{R&-pE{b^XjeMYQy1zgIkIKuA#h7*NUP{}o3^K)9|!nq
zk}%ElriSlqP7g9k-RovfpT4|nw_Sfw{e~XIXkNN1*Q7!lJ4V3O1C5G+tAYr)-Y`UA
zoB7jj81E#>w;*+EP{*H!fAG8IFv&0pI=6V<t-!sPrg$!{AI(C`wL3A1Yi}pn4SP1~
zwiLhHN4Sj&65(EgrVF$j1A3lFI_M)@pL}>-xHR<W($$qqSH(-$q)XR?foob%&oZ&A
zVP7s!yvuQO5W@%JWCHO#CgKGGPA-r=OcN7uiswy0U<k$+G5h%^?}3i(EZq5@1IXkA
zG8q>CpTZ*C_T`Gq3VII#zW}9yPS+fyGAeNK^k39Js~m6(>Tf5j<ukc<K=4A91XCYA
zFwsYjHF{eeyR2t*5v~}-YK$R+5zaV)5<Q;QY5pp*r|Db&Z|=c7UHkJ70DhhQ^$)1O
zQm;w%*HikNRO|E+#-@c+7lapHeDuP#l^3pwFL2TeoNx*xOw4&5AGyQkF!I>AIVpIE
zWN-v$#zXPkke?^Uzg%J?4RHLQQW^h@%E-uF{t5k)6)e?1A>$3XRp8?7zvzBW?q~F4
z$!b84q}o7_qz_E}vWY%&tc!)}*k!%kKdVyxQ|5|F%&lxs0%o+C5t8aNIx~)?ln8jx
zk7-V)MJ!x;m{>NMHEjZu_C7b0dKAyc(lHZYe)uV>xM>1(Fl0iCKqW+y=}egD5}al^
zjAx6<EsE#8<mV?5t1JhJ$ulCfG4CaCh`@aUg!pFylw`6}I!N=xRT#G%zB>|NGVgrl
zG8ND<vd&9nGVWBiiAjbunT$HfsD#0m#$?(heOs{vC&`E7AScC&nsmxf`HYv!%xtnv
zYX1NsVJtRUU*l$qis@`5`b#l^EHx=Z58n7cWQCXvyfn<h(lS0VE$1zdY!NPQX3oeA
zPiRmG3FF5gc`XsWOcrZpq_6U%laeAv($s=+eIQ8nrKWq;=w<psB47$vtUdXD3in_I
zuKixqJ#mysjyhsB8`M88suHSCi$!OoqBDZ+%qK}^vg3?5OmH2C63u;*xlb_nt(uM+
zEk&P{)`1lVw5hQQuT-%KWV2-jpXmVpfxz66U)pruhUk&+ocPv>Zw`DgAbPe*o^67y
zD#1o0xQ`3F!9Q~xO0@M!wm!ku2U4gCNOJPV+!df{tt6V(%7VE*(6l<sgo;+dK`Q2L
zUd)rco9FEco6Xsu8khUU-N&Wf$Azl1vJ#!=B<DH7dG2xZhQ)rdd9T#Gciy^iM07R?
z&W2CG#AbWTCRBHd&MwK>B{;h%HfGf$Z0!=Nd!R(;Vaa(|a2|eK>J@5-7kj0R2l17a
zSb9||y(&1b65~&)<Y*9D4y-s12#x~}_6vg}LN}>cO=EN%l#}yTC9@ZPI{2Vc?C6&|
z`h}_svJ#yaCFe!KdGYba%}bTy#tvy?2Qqs?bovCRFIHG46*dX2hgJ#?35ACqoD&AF
z2!}~g^nv##Rk5G?;mGm^ac8fzvsb7(B`eW+T5_HioTt|lJLR+BEfC&vqB9^l1A;T4
zWW8J1(kWDRLy68F$=M?~dla|8Vy@J3@Ik%Ua#*Me$VzmMO3qP%DXMYCHaPk4ywuML
z*KdmbH-)N6S&7am$vGuBrxKOfDEJPoI1UMpLl4diCx?Z@q+*_?=$-doowq58p7~*L
zdB3>psI&`AR%c`-ItL}^px_*Q+|;_bUu@bfHSI=S_KMDWC{~xv!iLV3!cL*E^TD`q
zepKisMX^UfcBhK|>c1U)SS21lFC9KFR9%&o==4iYzu@#|MlWPWAE3<llA`Em-g`wA
z`z5eNZWIrkln#K2@{+7X=aA$a5}ZTNjJ<26uuCXpJ(e}q7122?Ifn)3uu?P6YN|`r
zRC|`Y#g>Da2_tQ#sjfY^BOSRYyl_Q4az&`RCM(f7A~{C{=Lnk0<ynYG#T!?3m-O~s
zOMMS=rR_&PV}@9D%VL>Sy=6Wh<m#@n`S8~czI`xydZoBYC~hKtwt=@mCq9Yx+xz`j
zfAA`_vJzcWl50wEO~p3snD2|`iLRE|=B?1#z=IiU-3VQ-=xT;S@@?CtO$Q$|NnMx3
zO_zihuSqXXh?^z^S8J?n(^BzD*><69`(t;_HwxY@5Sm{W&fFl#l<1B~?ug)yJlV7j
zaR^-(#Z4C%awT_bZ0l}m>oMWPHF4`TvS^QO+=|gC96B#<JWm!rBwW>uu2Q-EGu>f*
z$*$!W9+pZSr)0w?jV<3RUL5+!^RZ`nr?jtM^!1C415)F_LjL3WrVoRQd%yYW2d^%d
zN_$R;&8Nir(^CEE1vBH4TnK;t;Clxb&#Y9o3zh9pwrpSMm8#leJNM9cTWs?-`fhq$
zRrig)cZnC!ciSM0j7cLivR;9&fO`+Si&ymL*nWa-jNupVR}37vTro^(T(AeJW?N)H
z_yR<JS~e_h`QFG6>z8hRzx@a854y#zN5r<HQrl6n`IyvvY{B*zK03b_{Nc8x@b?e?
z;NXKnaobU`{g~8#Ol&zWwH#lte^QST^~1rR7B3I|$nz7=!<|y!HF3{1amR?XV??ay
zq<U__{J5cc(f-5WPq!_He{}FC2WfJ+BJR5)?i!YM4T}v|rG~2umdA}PixuA-{$P0N
zuGo0+flqAgTgaz&Xj;_2dtl)}%+tJB`)*_*67zyv_nY>pIp%F$^ncS4&5!2CYFigi
zf2%rL9rOAYPk*x@YEc{$l_|8$JO~4jy5vu2ZU$wISanCLx+7HGL8WXyq>oiK#hTid
z%EhL=;NieBwYlOm9l$>j5ZxOWEYUr`EYNN0h*dSl`p*d${bIi#jL9n}Muiij(Xpl7
z%VXlUBVyB00cw0gj${)>vWX(ugh+e~c~SSOO}BZ^f;D;sk;b;~gAUBzTVicHq3adh
z8wB@;Cm3W^QuWSNovUOQ)%NZ0)qihfsr93&kEbve4_y%3E{LrcrPhmL{UxdX5^9_B
zMFo(PZKGt{DA_h7+lsvJ0vk$}WCiyYN~~0>24}VfiJ{f>-`n=R;L_fYUj6vh2c=Ty
z8L|D0xM5J*Feo;hl^V{X)!GiQ#M<S=+9|PiN~|4;Z9rnXHHj%DOK{=}?oFbt4ZIqH
ztxefA9Qm+&(Yka*YS}L~bVv;yg8PW9MB7oxc2uw(RlJ_YKzw1jO4`&b`i@9G_&Y5t
z(RN0%(LP7<dicX_(U-v(-@I3>-zU}Y6WoVoCE9u=Td!d2MQ3wXMfVAgHmKOfw#4cj
zbCfK+AUIl}z|NCcX2>r#1mM%nv3D6_b(^q{DI~rOg7e2?Wnk}-%D~-jEa@`PFzQ>*
zmo{QL?vYx07VNRD9m_Xq4my6BhRkJrrL9-c>c>TQJ56WScdZzvjh%+Yu>6(bgITrd
zuZAuQSH^^!A?eDDICMuEx}$cL^%(f&T{I&A!UmA!4{X@^sCC~;>proyLuv&zd!c<4
zj7$VYXA{_mjzbc_XF4aOZYB7V1@HOg3IHe!qcelQn3WBp>w@IEAh<3l{;n^D1@CpJ
zzt}FE8p2UfI(7NcscS2zu8F5W(awo|0jVz_dl6kXB-ahWbpu&)*GOf)rHbXwA5;rv
z2cQzOIx^jI!LW2k+I39U%1{`hLnT~f_*Y?>)0wd|_Zd!5i4Ph24gV*7&KUk){-81r
z8P3XXXX#w_GA2;?q>nl_s<ex&(k`+}12m;@6-%wlr+={N(bk@otv%w_UTJGD!UB_l
zP=JUIaKRd@Y{JI7{F+{wfUbfMtK}-G!7H*xtny1$exb@AE31~uTIGm^vV%~n(qA$x
z=SsV2IcT}WvU$lMyf~^h{u0z5`c87|LecT*Hw;ilx(p|&2D(sY)Bt^u$1cNp+3h@;
zX7v}yL?3mGR0Big00?`6rqpS8>G<;PAM`)k-n+8BSKNM7+I|#%a8^brK!gHBC$9v9
zlfE2%NM6r`7LKN1j?}qpvPP^5NNg+lkIIa2g4!Bqh7(YttzWYB3$}j9h7fEuUwq;K
z(~`jge!3)}@qOaW6GHikr6y_XVSJ$;7RujLa9iOQ_^sw37`{P5hyD0w{;eF{-{s`r
z+H3qfN7Jnx#=qN<13lwT(XhVI37IYyKmYqY;4uxv)D9wgSR{YhS!>hT1|WJ-A62sf
zC4;Al*JJ`L|7&)AZ2~#H;tQ+>K)z)Dz-nN^wWCqP{G3K24|h`*L3cU4q+vqH;Z#f+
z(VWb*KsX1>?<x*j94$z}6_+)0o6B;WM*=~!T&+Jpy!j2oO~T8uAIfEc8SSE&^E1n-
z4iU-4wQh?hpTx?S@#C!OP)Ntd@@;4N&bZK4=V9F`6sSWzF9i(~z5zIM2bl14o{ck~
z&C3YoTt_IgrgpOSIaU{iBtS5eP<5T5iz4P<Ei?xLmncZ$GJo2>i5hif$z540T!_#u
zi62+;tns5IY}EF82ub*3kkjTwR3ZgHP@0Sw-!;Z96NJi@vAT3;_S#7P`euvEFR{4-
z;$hW-8$lFVmCAxP|9pq0tTt_Vxbg%!Uh6u_P{GIpD<d|piYr%>2%+|?Gs|@U!n)(k
zi`cTw5cluiUxuh<JC`$CfPV)>Z|G9w#%e;)NZK4oLMT~HSwKvwix#G8C#8$zzhThU
z)vPn({H@A}JzA9VmC{9=+PcFWU%=HcBFuvOyCFza);pjpxy42jU%!qT@GxR)xo6}t
zfwvhm&i(5Vd&C$qF*2-FhSun7tdDt_*wb1P8VUmQj2I(!{$H*mZ}saQeGR|z=(B5U
zRofz3H?-=8S<5=|-KeQkyLz;upJmj>{f!Xv`EQv(kfx_w*U9GU4F;wmDI^qt^pJ{<
zjt=jsGlx%|IoW-9aPZi`5pPfD*-kI4pIT4J!ubhH&tF38;f|L{c%vI@+9L2jBCre)
zt~i0W0LUX<Q8A%w@{Pb=h?Oi-1j{LQPxa00!j}Ir#qeJexC)@T)>5uaufauiW$K&7
zg=vaHC!)Wj4_08)l647C`_wkcQllad140~ep*oLbAUlL++YH$uGP5#mue9<Sw`Iq0
zr}%(X_o|A33}iG&*9u7i8J<@K&#O^xxmVghAe<i(_A_;D^wG7OE7xucw{A<fX2ol-
zNY`G0Q&gXDmwmCY>`vxChPtCW6j_=Gw%XZl^nR845~#lXsro|HwM<qNU^KLR9w;y2
z;)_BDQz_|EB-CIM=M8N40}w_46Mf(<TA1+5La1Y7^~k?Sg}4k5E~mjU*x7&T#9{B5
z&I|I$=sk0?-y1IXb{+0LdFF81*g*hBQ4+SC@#EPOqF{KMD-$vBHz;1>*bW=K-mnoP
zV}QR!b}s-twKLJK3Av5lrzGF>l)|qL>!(FYf|QNkAW4CR9seQ4=27J`F^muQsPk%~
z(HwRpMsqR=U$N?X?Kb6O5CDQ610jBejBFsV7X3>yPLONP3=-yfn#h>&8a_xmCeP6}
z%muE^T$iOP@;v!=;(dZ7%l$A_20spKUtt=5ecY)?txVjiv70IRDoS<Zmb{NjN@8Kt
zOiBikd=bgdQjSI`c_Z!!<2ktUnhC@UQc2JiT-=c|%fcb;iAqgKg;e=xQy#6uUhh?}
z<|kiuMd1`q(#NHdR;0@RW9r3!i#ZLH1y3rg1+!bZ+}a|Vw@T)%f_dwcrq-0;!#$QO
zKz9&K$Zb)uL>Eb(Aw>RNOvy7O6O?F!V3A9L?GmAWAOy~AKW+fJHyU25{b2S{^PZLF
zJ!11dsd=ARzhA1~FF3slqJD4*BH{)~b)J?rqVufeJS#ZQDt=}NU)(2D&5#-nNW*+W
zUo|5-??}!&g7eOk;&Q3DQP|WW6q6#9%Zay6V3^e%F)WozTj+)wS{VvwA-p4HgnmYU
zIN>HcVEC3CrO^ur8^9PP&?K}S5Q<5u5;$gH$7>3Bj74(HAe_IdHbVcBEQ>_(K?xjZ
z2{5z_^uVuHdod*|iUa|^Dtca(0IOb=s$QMze_ZWFT*^G8)-z-nTo5rJaxUv@m>zI$
zAb~|7>5ONg9$c2cqV1eyJ15xAB^sLC&qF<!l)s|wf@HfO*e)Q0_F`GIrGM%8qwPH_
z+k3?Az0!8b3G~ZKv|W>I*96<O$0Zf-Z(F$iVSV)GH`_mG7b~|)m0QJ<ZBhxOsN~rp
z$OT|tAA_{V+P7!tO-frISkwzu2cVWJ<S(G;JSaI23eJOgb>};+-)jBl#t(4kVk$%|
z0AD*k-@Q;m0v;Q`S1guolFEQ^G?wfE?mqDS)*rMk2Ul>RxwR{SIushZ7K;I(mP#O^
z!nhRrL8WX46y4pDyIWxD6B*YiH0+`PyP%eG<ge)7ExC6KOs!fdBL%t|@7zGF#xr*^
zR#iRMkAYBIH`gQCsx?AA&Gtcv03w(W>sj0QsCMT{?M`Imyja^K)%MI6#ENT$;!QDE
z-6NN8#pPRU6J2{G*B-&OC+4b;LJVcg{K@%~Ph7Rp)1s?szCTvhu&7@t^9f}>rHrSS
zI)#SQP!Lkez7My{U(tO=a-R|0XTEr{VGr)37a)wZMb+}R1^4ZxR{=;3T~BD-w?!K-
z*?7Um(-5yg1>3Lal}0<ieDl$Pft3RT;(=4r0bIM{zL1pYydXI*2+j*BnU8H^(;lg5
zkLcMedG?Cs`=s)HK!g=uLA4N^?^KYbFD}&z-WQ=DM3Q|U)(96bNymm{ljyoExh@M#
z{o@nJ#x#56g_I^?4T{PF3x=Zl0!SN@WkFMC<}YHniOEv_0X>DC=64l@6l6259ZK<)
z5z3@FN<z}3HdY?5w%pa<q#Y;waYi~EjTnX%o5?)Rh$Eb0j_6drO^HL(L`Zeuk1FV&
zWUTb1gJ3EojySnljvvpDB~M*Phz0T^vmHa%ksk|UQtFMx1T<YH$}3`g&A{nV`zH1^
zg%sXd@cyN}D*&{k+BJ<c1#&c1HGtQHSX=X3){$2$q$B2YwzLDh9sFpSwgs8TdX!W_
zqVexXjF8=Fn$5)_oBCzO569NDUV_rE<rlI+wLNN$QeIm}DIIuIDYHD_+s|T~rIhPP
zp)dleH5v8jTyH&UUVuq!d(<%@qS=o&x7hT!pW|QQ0!|=wj%Mv2`_@seqGzRqzFn-&
z9n3D6mDFJ_$$UkF%g1|E3ilHFdRlK*$C$Oenf{d&|H<VoeOAtX_3{$JhAV>{+`OzU
zQN50qa5JQ9hBToFNhvFT>Q0?(ynZjSf`D0toe!>{!|-1aphJ;-lyW8xE`(iRCn|Q5
z3Ril)Yv3G!axgZNOnj{mHK8KTUg2u5w{sNIQt2g0$UF^##{-8hcBsJFj3*33*r67S
zPqx&?lyo8_J={t8N`(tsD2R8{0ptQ1$nYoUg3uxrUa#_;M<Tpee!<flb}%w_5V$iT
z`Q8`8hK-Qof0<HNg-Mhs5eS6PMDogrQp0YqSEkWd&N-~^z_DYO59rG2E}jRb2Y)EU
z%k@qqm+>5Sl#^u(HjssNIQ2%UEQPa$l4=F;ndRdgt@aE}0(t-J^;M!Fj9cYA%8#!W
zD5Qq+(aoWlWna9J4oNgeCy!b33F{wW6K=|mYDqt0{$CWuhuCE>QE{ae?_7NA;=-s@
zvw5jbEZHHI?3l~P^NH`=c<aW(9m%s50$NLVNu|5ytk_iBis!d4)QHv=$%;!sqopop
za|*U<>>Z<pID^Srz$e~XwxfJvbx78#sD8y-D_ColOTI&kJw&Keen?iLty8jf3bszg
z!w|bD7Rra9An=`iHKN<uUEyVRp(>PLCUxosStHu6NVY42?Fx2(wo<`b9V@S#J07z_
zTNA5*M0XUUw0xoBYr}63&*eXcc7s^jB$YPJS=Y$z)N;jx?gwWcbpOO7luwbm#0h~L
z!srb^>C#xjvV2OkO-r_E!8T1v7op%dI#`1GSamb)w|&@emsdY3_pOxs7F(A_#qtiR
zyaQRQsCiV;x>C^!G4i=$#X+g!;9OtKQ7k#U(ase|qu^+y-76z_oLoNeu>RrJhxO9F
z0ik-5)TI%DyDkK-14@@}BrK~Z6*4^~?Cs@~54#_pdDt!OKP6P(CiTK7i2H;akfnc9
zdf`^Wvii2@<RvFBIC*Mu$6WZ0gZB?EoR_NGR;+D;wN0Tk>XmxVt!zCfY(1AXyEyXI
z^ca3xzkKsY?LTRkO)?aQu1Z6<G&WN+I0sJ4x8De*cEd5T@Gu<{0bvgqD=wSg{)x3<
z?q$(hDOoG2`?jxG+XZVoh@4&)w{=U~x*v>6y%$!tT@bciSTlRQ22eA-Ebi-*_R+<B
zO)pEh7Z>wHp{B99{StD(>lt26@hAJB3^KW$fRw=_at1*oTk4IKlAPKKNT&suBhj%o
zXSN~Hvos)-_dqG~_@6j{1F4n|3Utw}IVjMbE;}e7Q#dGSUw{fJ+g052LZ3Pu1hGZM
za;>@%zDcpL9|zIycs&Z{)Z6b-WjmkrMc;4Z|GSC>_;U=YM<00?jvrY!_E|zH_q}}S
z8hJ8t;QrC(iTHsaoH^&IyPFmlCS-Fm!7PTSjWMS(A=hE!i@^;~_1<va3s+uz>BZpX
zkIX7uoPo!~DnZB;44@qH%fEj@x<&x73cW{qs&qA4TNZE0=VSg1*RC<wrWS#HsQAzC
zA&S>@pE$}F8a_0Njt0;k<M3!HjTM*7Z^h_x7Tv!yKe{jy-7Y#>BuC35N85^{O>}IQ
z9GmA%iD{&gf_TyA-`V@t-mmR{d;eTd!i7jvESnz}m%P1mp*4DKu|X_umx|lxdY(9n
z=d7&j3}89iO9k0SU_XHl0)+&qbyf0li6otm5}*d;X;ERNO&78GEpj06zu-Sen~~R)
zznBTH3~yRBoiyeYk$_G5W=Pm1OW8>SujO{?bG!@bvNg$)glp1w4e6TXBzw@nUc>&J
znkZgZz-I}W!RNunhuklKQiC=fMD)%aSJb$w!)Ng_`z#09XH6iFPO6F#{42lYaq06R
z^p$^-$WG1sz3US9nG(^NpYX4VLlM!L7g)>P!o+p1?M~u6ac}!;5hrRzTm_s>5hv<M
z6DMkiyOS%>h!b_du8?zLqg)twc7w}vYN7`iYvRL(K;K5j1U|y7ldLldfuih{I^3|~
z9qu(`iBi93)9QVU-Y+RjR5hupdDyjgYO@B&`c$S|rNcm@ogRZlYJwh+1{r{JNINY+
ze(Ce^AULCEh9nZE6tRu+K5-dN@5=dKowo?2plTPAS>xmjnmBS2QVA)JWcOD`q01Tv
zNefgK0WMb+hpJ?$XtKsprjg&7r;ahR<V<>}H5nuCpw}c2La|X}PJpFR<s$o{##z+F
zC+T<|oG|lQ{imIAQUAIq@MoDnoGjC88ZVQet6Rf6vUF_u;w;s#*<zUBmuGCh!LX0U
zhIiT@x{)E!AZ?I@OPE(0A1NMToD$uXFrDGF{=rd9;xgjKk>H)U>1J?hGVD4x8N~h9
z6i;lKxaXemGnVE&HZ;M}T#&q>lk*K3?u0nt1p+iE@T%Ee@U8{pg}uncz*MN0_}$Yv
z-H7rB=dE$%YM7E8k6UB|Wh6gkFY~XlJG+8mxl$6hXTBU2>zaiMJWLe?Mn(Tij2DcL
zA&nr7Xm-w0acq^OPdJP@_%x=A;AKqq31%(G=GR+}%gkCw#iC<U5xqO~iOaoEDwX?0
zSF7Y|owGb~xF0z@cq~bD)G{Juz2z9f)C&zqmX85IiA6`HBBGU6?!xvi-V<DVp+xgO
z$-GZ6?|Wh`l&sa!vK4EKU~P$&RX!?fUMXu{v@Gou%Rs)ockbxi(U`qhvU{RCR_rZ;
zojkqs%3H61#=TT5+bxytR(aXEV)qGlA9;E8tyiP>mYT$}JyO{o^709BpB-Id7b(?8
zp;gKzXM&2oBUa`SyxXFsQtdW;rLye^mt<4xeSaj{^7V=L!1~b(wT!C*`ikZwlKF^W
zK7x%6@?Yv(DfKP3EcJ?|`=ru+IGH&r1apPF+8#x#X!gQ1WwA-yjOiPDVHlIYD4Hbm
z$LZC(O$<2rN{JH2pfzMcIcuhOZ3xatNDuVkEt<5mPV=1UIceu)rXA5Uam{!LNxOGX
z&p9S6)zr)7X`Ua#ME6`VcBX$~_#8~9mPB0~p{Z|}7F71*tT3gfdNL_Xji%PJx0nrQ
z3?(kKkR4_vHkb=wR^r37pFsztE?LB+tr>M(&#j|eO-wl%F)6utZcKTY>~ooDz&s`z
z5RY7Od5PNV0RlQNf&R3To(cZTB_E6`%?#0id5VvG(lZ}<DIe>oRR_^-qAo$hiy9;<
z(9C*EHXy!hj3y_G1t{~iV?hau5{tVXc}?oESSNe-tYz{gm9Hu_#cM>5FvTCcwC6xZ
z1DH?Z5+Pd6%AN_cOrBH4o*qZ;nsL}7`4JlgqSI4jmdR~tY3D}rh!1$p(Ao#<seIY<
zYNNbHY{|T?qkT%W^9)3mJ-;<HCus<WEh9Xp{L{vU)^cagHS0*BTr-1NxQbtU3YBRo
zRDHP=A{K0zs>55lWx+v=(VL#yNQmN5zqlc9M6m3$qe8fZ1);r;yn9MG525}Ts-6FR
z(tVdeKLLh=uO(eGfsYA%kHCLH;138a0ff7<Nt7XhE&*(qcwjBq8{i~c#~%WU+YbRj
zTM~Pvq}-Ws@gNhQqY*|EwlgU<iGp>=G>6}(Wd98%>yT?8Ap|G5m;WI-wG;S|z#kDH
zX-@nf6F3QgC#49%Ez1<3y%R4HxKH2^0WX0%0`&wO0NBSdgvn8|I7Z+efhPq1Cjx&>
z;13D>I|8)1;{Qj0gb*D@Z^ZcbNyq;UT!IAvNgTKQl{aB^SRcZkj7^w!e>Y9rvA@Uq
z3*1X`=h5}O?j_Sl$38wLxVmH|n!6=)w_xs;(VU?-_T1kyfBVgjxemfZ54<@bR9*$!
z-1d*|eS8mES&25kWb+F)|C91+h5y|}{O=6d3&2rWUbWEkb<2B}IXhx@S1xS9rf1Vq
zy;!$Htl25mU{|(FD%&+@dknWuJTTd@v`wtvDSCEEo?SA%#+;onJmm{pSFF`QVLz#D
zm;-lPO{~dB-};!hp1#!yVhG?I%l!sn@RBr$mqtjRu$hoa{21C)v4z`!%sn+=7*6<<
z{R59_zeb2wK-d5x|KJFYd^osREi=OUPKk}DrN+}@@foT3%$)hLt8BpzUfB*}p*<mP
zI3YIoOU?bFYd~@h%voY3b<vAr$)+WXSh8o1_&{x?3;H(?%pJgMP7AefM&=@TCTpSd
zYxa3_%vrJE|C(bye?C7}Sg~;Wt?K#en6rH0^w$dJEsCS6$R`ZK<?EDl#w!IVOueKw
zLf>g%Buk19Dq%k>VLvNjzd`9A!;7tgYZ&UmZuu*m1<3|kh~}%3`Kn+h@d0?piuI6%
zOW<O?vOt#%6*WpfP~;#$5<<SNpmd?|&C7F_KWW&$boRj|;qtK1a8>4$6>YV1#}_P5
z%IjI!TD1%XIV{!AmANaQ)Nfnrd2m#C;j&PFMXbLf0ouF>Yi41a(&R7TcEhP_+!1rS
zVik4KOJYS^tlERf8zJxk01Tat1&OrLPwTg#eq$B&vY&eLQ%`>C;m0#~bbbu9LKO|M
z8l<AhAp}NP9P%5+O`^G3GSefX%CpAZ9~z^5OL<bmZqd6(@?wYIEi2L7Bbj>ybC2S+
zA04Z5=|#!cB{p_TjqukmE73e4nFj>(fZ~<L$-c!qQgWOKu1;Br<}S(HC78R^XM}pp
z5}Gpt?TcwY{`nkS=xIglm7z=iB6{GNRYWx(8?<|k9eCVSvnJu$9W6ox^y{y%9Z3!d
zCUVz<;^6}p5Qaoe$vmjP>Rn}O-y2EWooUx|2v+Ta&_xx>BDF(?_{COm5jj(7WJl8A
z8Ppu?3v$?AFav3E2|J|OyjSw>-;uv5;fwcQwZM)3w6%hVFs${uExMrTC4(%cQLnqC
z)8mg(<mZJQ45@K^@Z`V=hM0@%`H%Gc4&;-qt>KbmlWgJkk{7l(j!g!<jG}(PmmfDx
z;q7(VJJHk1*!*$QRiSlB^p_t3SsWC&ConR_sio%7x|Y0}rT!wXTx<>dIh^8Vh=dFb
z<BfI_s52ShKZRwMt5*+#hO06cq!fDMFKIP80B?jGD4a7rwinL(76uo0etb}{?-T9&
zBs&PpaxJIy%eNobfmMI;HW4BcPx2|yni0elXkKVgT>%g|Ash|Hl2iJayNVc%KkWJ7
z_+m(G*e*3}7u`D~_YQ%nFCJHVq)J4mhhH^J@TLmQ1Iu>-pdN0KzoPr3<UT26D8_tg
zq<Dx6GmmA#$Z~0<?8}9a;b*Ks|G=Qyel0ykxRz%Km2u5kgLH%OZfn;r<Ii%MJ2#nr
zwoy;|O^&Xnyr1p&cGc$nyebDaKd;S!&Ci=mq~B%g+L!n91A0Ksd_bppHXlqvqrE>%
zk=Ehdp(6c)o<gwS$LV)c#<fN=2<g!BDeMAvrXZbTy}o<!JnQoyOJE&-7ov{m@#v>K
z7xd-C4bvQn&H|PTh%L33(9|$JCx_=z*|tmRym13J8l?GzHcaw%$xFSnVvXL3X#bk}
zCT$3@Q?fhmzpzlgc>d$zhq=<xi-P@<Xul-cFU=WZj>6vwzZqWe$yZo{xq+c#G&M@6
zb5^7OH#FKBUB~QdRL`!ddQL~E;<+j#{jGPF(VW#y>bX%H0}tL?;8s-F6W|CGGNkld
zRNh9<MdPt2ddkC;I-3lF_~|<CCVe^K0%cxfP0ahy4ty)X0OL#c<?$QHhP69QRXn?U
z3!>_H5g=}24V*wzDh<p;_^}Amag()(BV4&gI}?)hZ>gbwf~cvXv&}!aV#M@QT=w=J
z?6~AhMkDM9qbGyUNcRtlG3Ah^V<R~kDCI_*?9zGe{T|U;D_LtXuURTEq0RL@wwEoK
z-sePny<~^*K%=E%F?Xq9v0xF@#bpb*Z|@guo+mE%{LZ<4_MYrQ$sXg6OdWYY)9E|3
zgPN8;)&(0-*xx*R_vL@b7X05Zb2qjsxnPjY!$02eQ&ZTgHwn*Sl2lM<s3GMCWJEem
zCSIWclVomGL9HpCp@~?vQyCU<LObCeV>S;=k_u`HI&*?%bv;ayc)<o+j(o=22n0zs
zBovq=bBX3|KXUPW1O<!9Mq3!5wg|yhkcrm1|JQ5-;^6-dQHE=YK}gHv*L`mAEZCTm
ztbY9e27ipjkN@9EhYp6JtdQXGpOfX^6IdpIGn9`1B>_5TW0#RSQ{N`rK>~Df&e(wD
zn|9i^_-yGX>8Iq0&`*rnhsVwdL?{Fd1Qb>tnJ|oIEU)S(uK`imW|<pOoHwg9H(Wq`
z8H_i(Me`oXyhkwa(J<bWfU0R#NAOo*ym?aX`Tqv{0XPtzN$gK6HJh&f4^;g>FR_0H
zvASx&XRNtei4<w40+4YVQsnylMqF&FZc0$X#1F$aeX=29@$7!H5Q}S`GRP#UR`M_7
z&p}d+X({<jFA;LAVCfS$c8J@;`mYX?R!)R@b{d)fv!+4Q7-t0Oa84u#I|UP;L*tmJ
zdI$JCXnmG6>eCdBsZ5K*rj8c)muX9L<q2byEEwv|O7YnU7y*Jb^UE`WJQ@5W%;DP1
zjY4WRV0?%Ubj+X&5qw!9QHJ9AQdE}|LME0pSRk}(-6WDS)^0t-iC>mlko>PfyyvX>
z|7r5pq{-u*)a{B1lv3ZL$qju^?s$xBF3UMNMKQMx{`oQr-GhJwzM?NjxJH;^Mn%BW
zk(~Dus3j0ZY|o)+D<^*h252e#AvCWke=$?LU}|4ARp(R^K^s09^lY$W4k~cKug|HB
zZeKKguy<8Q`lVU<`_T1pL>RmPhHPo@^1~_V7>LoxJqzqL=7S&&<ZJj4`5HbC&OU7T
z#Sqh`1Gh1!F^Z?C@mZ>7p9k9?ntp-gVUrH>^K&YqJrqU75_k~tdC>FF^^4<7n-1L7
zS*Y@0M?`t}tU;3p7v`)22kK;UJbb7;_$0~kpk-ziIQpxS<54?~pv{$EVM6!$Q@Es)
z<GrpMGFkA7v;4>T*xS~4e_l<9M(<f`%y{*hv*Mo!*X)73(E_gE^&Gsh4e~W+HgQf6
zsad$fKpxEb0r(Br0@l<~anz8~GgrIXMx!@zMT|(U_}Q@}(t2h}ApUb9E(iBz;!=r{
z(&Q_bD?waNu2fqx$_L&YFN0el=MLnI;&oc+i?|A=uY|ssYkh7Gs!@|Q2$9QqP_wRN
z`vA?jft9o#?hSzwt`YxD_-|%4Po>w^WC7QL@TD9`Cwv>@#e<MHj_W-lbUHOQ9U#PB
zxTGJCGT^m)=Hi`BSdXBuC_mcn(`@$nT>vd|<J+*I3qkpZ^O+J(jnc^QMX|A;8tIVX
zOW7VN4;tVRfFlSOw<PW}r+7w*8@JpTWt2~lJwW|GZXaP3kO@1Vo^2C?ls9=kj+=*v
z$0o-@!^03po>?hvN!Me@Xs`{zUei4;Z~D6DJ<pu!e!=7NHnDuOR1P9#7^*jk)$LMs
z`<zKK6P-?yRA(T?Zo^k03ci?ti$Dnh8UjqJIA2Y=K7bKrlLZ5v=dJkvoVGjKTBmo}
zK@Et_6a+Tqa{BjhN&kWVw+Z=T1U6Q__)Rt`*q27fuFtSHKqgUv&(20S5(5sJCXFO|
z_?ZFXxwjxw{dzDHH;hJL8G*zE%0Rd_TSaTU3{Y*;DfczqFH3b!Oyg!}e#~7TD{qXI
zdJ-CU{i+?-%ndg*j*?Y}V$_)}`E2=&TkiQM#u-`L2=(6NPq_&Bee|t&FzacrL9{1)
zyi%|9Q+f|_Oh?xw*~M9L`gES*f29XQ!CJo4w(#(aP5&A(r){m_H)l=1Iq6pr@C{zz
zHDb==%vwYse1qS24c`_nKf^Z&nbz=a<!l+gLAJDp?=(?K$OlN8*6>@vIWl5)uIby!
z6=wJ@TGMw?7T@3sT_Y#OoGT;lk~MvoaHSc(%hvQ=mc_SwP2X;=JR|OkHGNlbl^MRP
z*7RM)RcH9FS<`n7=gIKxUDLOhtIhCTx2Eqp?f675sbAA?eU_LT*7V(w#dqVHz8krw
zj9fRb>ARV0N%#HR;FJ&1*);Ksnvy#8m&2RLnKaxWgL;0*)`&61l=Lgcu>o-?DP@a;
zaEQ2aC0dol4;QUXxGGjcfoM)a10?;>m)l4s<2DhlGE<fSUbkzTHN9d=U9kE#hiloD
zf@Up~$&)U1_RE*2Y+;M%$}7KWoh5>4MyoBaxNb655~v`+)?T(e*O1Xmz(b&pKrMlK
z0u2CX8G}@>%rM1}5Y6P&O29{8BY_PBXwSfJBCweN-7&G}N%*a#+eU!herzWlU48L8
z0m9yH^<tfT8ihT9MdbuJdeO<5(DV%6`ohC}BcUn&9_;MLCh=Ow%t#0<$}l!Dl5yn$
zm9^?jzD8!;O+HK9$!=`fswoGMd89bXS_*y3ip8qzugKvq0g}|@M`+(=mBX?NIvFAJ
zDZHdz*ThR$S_!XIxM4~+?8~-6$m*4@-h#AS0Di{QY$<vS_R&3J#SW=rhs@!Mo0RfW
zkR>HStg87NC*M7}*uT76tm=`fdW5Q;RU=#>Ec_e8?+!0s5v%u0)%%6&{i`_?C|6h6
z^o{m++ZQ*86+5MhokGRV)jV=A>kvYYYL{5mEwNW7RxRX|pK{tMR&_~LT|!mYs+F8<
zx*DIH&hllkx=*U^6RP`G?c`LTgcA0RiB&hHD!evvbJam^PQ^{E+ACE-0Mp*pLUJfl
z9G16<RR^UiJdkj3wV0e-y1Gr@@qEi8Z0Uxm``Tkt?J=SD*lG#6mEu8wZybF0;Nt$}
zVzIJAs_YOdJ66lc$(?d?iIoSW$^$~>fz@(ys?arU`OfgShJ_t{56_8>gHq$5&^WkS
zNp4k2JuS_M6&+GVhfvY6T1`$hsThmI%KcL1exY*zs)wAsDJNB8wd7Q%^R#|r>fNcu
zspWfO%`vIwm{4<UwVs?BQlZ>pWtUXhB~*5;Hj-15uA=c9o_9Tq6-yUn&SIgwW3`!_
zT9m3!HL;HzT6MJ>zvK9pL)hH;V3+9ale~R`w{LX=xoy;WH+<vOcV89S4?ZXpJ;x-^
zF~M_ebrZR@WlgJ{oHnOwBPdqvlPdNJ75i4Vz-g}cmpc`qOhowahAgID(RM_#9T99t
z5_DaH`_STfY109Gp)`!)eQ)*&?)~&22BbdihZ1calC49qbwHrjT>rC#N}qY)6Uxs(
zJ-jV{Mcbfc8x(AVu`Cj$zjEAjyiWJ$b@_co#y{U!+h;faxAq+9*>Kiurs;S_8|2SV
zVT-M07vuB^dgq-G5kbc;_RGGF%An&nXu?nm8NaNoP#3|<r1<0vsvIcG>zBwuD$M$G
zP^P|=B!1#lelizOFk|Ub3+x(~j);RZeVn7E$4XwjMohuJbx<{>(M{pv5)zj$dm!if
z`4myIgzGda4$xY8H6=A`H)d%iql0Y54~N$*C4)#2V<c}Bw-G*z%z4Flx>D$QMySje
zAv*pT`c=4G&2vTcv)<s;3|`pW>phE8|8e*Wml3){{oLv(9-$3zd-0|u<Z^^dvpf5&
z@<I6!Qjy6MqLgSS83ocEB;9Y3jxCS06!Px?Bu~wB9TCqbmMWQ;ny0(WxOpr{k8F)`
z{2dCwLcIlDmg93VTl(;}rb>iGxOvSrQtR=z(P@K}S*(y|cEf;oMPva6xMO&)=27{^
zmGX^?=f(2vQu%g>Ver(y_sS#B?iJ5&$a^?1dQM256OcLvmEfJ)y<*=W*f$`wRKDR+
z`R0}K&0wD@-z}BzX2B)TmPelbE1vz!Lk~+u&nd}sN)FGsrut>cfYtp%b-!3NAQcS=
zwgIfNRo?dwJgPjnQh87~bV@jLQMi6fs5~fEj!Tu}qS-xXo^Ogd%0TaB*+p|C6MV5%
zv~81Yc%y4utejZQ>_FiH%;zM8JuF;_9$o--s_5J(IX4PSv4_KUh($Z4qMd?mCsKB~
z-#+lDc*{!h7KogC&?{WJEEI1Mi?2w<SLV#Pxs196hfgs37(RhfooEPtXkW`Fzu$*O
zrnXp1^4PcGBZVWw(A$?Bz(n1^2IEF_{M;y-EAxk|HwA+MiUWNLa|)9RkBV!xG^_%K
z>82Fr!2SK~&fgT?+0EqH;NjT>&W{ZnP~&%wPy0vMv%%n8l%MDwOKmJu*est>-Zfu9
zX8DT%uweK4bcfHJl4IZ~S+Id%_p`{8V?nvnk6RNClnEx5Gs{SDW2*=WJfUHbekHui
z?3Lk-*@nYfz87gsd=**3MZddZ&aS!CH(PLd0Fb!SXR=aa?m8ksEM{Qd6mwQSa@MUl
z>tgjjseVVSx;EPU-sM<LU3B+*lb{D|09QW%=!gr8K}TGOa0<xR`L@sXFSLrbM#0wj
z#8D-ft60y<-l_gq<nl{)suwak)d_Se6;<yVYoCg)?^)eTy8<CzWqqW!oHe8SD(G5R
zD8ESB+JH{kuj-U9vQB9X@9bs#vfu~_@nd)z8SN{xg%CH&40{bc(LEusXkt2);ICD6
z%3Qe*?ncV-T)BJ5-E<FZWWlnQH1d+TQ}zdb-OP^ZNV$<zjUelsOe#juf*8HYAG_a+
z7t6kf@sXcaM8306)>%9*Q^)*0M4F^P(ezA<1@y51$({-Ml_y!x%%zDB06mjlUfJ-d
zZ0kzd)|jh8ay7-kY+LqrKYiS94}6wqau=aXx{I(FlvGNthR-Y}cjLSXXO=?mBWKf!
zvnd)<&zG?cdy_IJ1zU@XkR$G(kqRNe!-=NSbYSWfY*wVHfF2nkyy&=Y0{9NS&H%D!
z)m)awcsgsEGvGl8<LhRUyBDQcGJcSJN1Gwchu>A^L?SrHb|E)>JUOo=GN|+=Ol>N8
zB}u6Wi5?`+bJW5Kix66l5jbK0Ksy)BO$cE&it+z>4B&V17<2?BOwGs;&I0yXl^I)E
z&ZsgQG2@%GIA_A+YX=urh`leO{IejYpUr#2^4twH?f@cr+Bi5<Vpy(KlbOSEAMnbi
zF}g#@9Y~wt!>0Z1!2_)G`%H0Tcx;-damJsgJrFfDh|$e~yX<l9VBDr!HG=V6Cd(@r
z&y!_%1>-sLaAH^lMy^3ih)UcHReEx8d0&%<S6N~(qHVAnq6r{4JUTNu5<dQ%1pxO#
ziSCe+w7y1MB8@^?9=>=@7pts~_CpZv+=*C8^}=<jq-m}<R$8-gS1N6pI}&r1&K>?_
z+s>uPgYyqBh}+Id+s@&NnbB|B@pxl9-q#vhEPbLHl}q~`<Ui~c%Lb*gLE-$1V%dwd
zo|KSWtA(?ovrcl>2~0h4RV*ByKPcGh*z$l~Y&`FpKQwY9z@h5RiNXX~*Nj^j@`OEj
zYn<>;gRb^Hs!Ni)B4b~uF5~&BO5mwC#|slRfN3uoA6+~p<X6izUZPq>Q{l7YXtH|!
zGpbI%T%AwM4WOD<D{R0@o98c}VxzB$CA*}OUBn+{wE%IG1yt#(o4+ye?trkN>tWBr
zfLL`}syZ#0%bz&Bg4xR|DsBn-@5q7-G&7QNNd51T6}s_RYnKKBE!53-@h|^?XI17H
z;4xMDqz~XRaUYa<u+w7>V3<)A9u%0-7T|nUu|$;xG!-9W1Sx&C7{PpEnl-*+<Ulqh
z>r~j$+^}~Lh0i+6Ve;d~g14Tk#p5YcAXOP&5SP7xzXYeIr>!#f5Kgr8W}7$@{`YzZ
zdO@deWA)C6@`JZjsKOM2FhQt)169itWvt9gLdB<Ho*3XPmQ+Whtsdn&ydm2FU&Hg?
zP?!4(;w7|TuBAL?D}7|EU$NCkk7Yj4cj7^L#`9dDeEQ<C{5n@Tz0M^A<=}DbfnO*;
z1|`~#OSa>J4KK{O=XWIk7=H*J=PHLLNLRdH*}7BrNBNyC#-Hh`JO7{7zC5;#E6tOX
zNU?Z`5-CceWF6FL9hPiamn|Q%BwMj{`H*ZU>9!SFq;17IoT8N2w48Xd8H~E&-O+fm
z6LrQxIF0Te^|rIC9du(fgE^cuFk|%0?o?@OVJ@)&j2>ix`C|bYodp{CV|Rbwt75T=
zWI5JiOQnxh?|tv6SMR-g?>lR<Kd-UkI&NqCI<mN-qoz=2LH$j?p?0E<s(v~l>xxe&
z8KFT;vV)>KX}VD64<^wdGIghyNU5`ikSaq6;(Z274KI-^1essVm{6DsF0_Plu%Tno
z<xR6y+5tu<u@lmUF@_FD?1UhPp~J2;L&qMe9)Xrr#(vJo?$im{a^R=txSPE>Hs>0T
zDZ(N9FZfEak=GOW#&uj?G6hJ*`5P{=F!{!t2)`PdT7)8W7`QEZ;(ey^r^$K@JFO56
zDOyu#2H!YZC(b3Z<&$Gk(W!V#O~XcICL(<adsxkgRG&N2l4(pDqW@1Cm0PF~jY_-S
zA9L*$^k=MO_gcleT`buzmF!;`Qi-AILsX4Amhez3lS(sEOZ(Sfms&tb?Z+v)1|-*j
z;2MBWNLc7hjB@6PPX_8J&)|5EL?RP^iBaySQSQfMz2A4Zuh;Z>ucN;$`wM${|DNnG
z_E>Q(^PCvRgw{#Zn!$?Etqk`6j4y&-wj=dB1QBCP6zS6sOb}+MY9fc^Y8b@5mvzr)
zzSO|BjHI#&r6Pq~8X}nz3~$RPSrJH|Ky6Yp*swu_gu$bfwI<X<vHioUwG|G;xXj-*
zEx9mB^n`&1?vY*p1VIO_T=i2OHjR5RAv)<#k8P}CIsbh;l}mZ5VdbfEclK1eTC44V
z)i!iIg6>CoKfycwnfhMw-QQUa52A+oN)6kU8hX?kCgk?e8gEj6VMnSGES!MBb8sS7
z`-^wQJ@Tfu)?!f-C#soblcB)^oddQM;xT%LFSB~=s`-9X(23ncVWei9RwGHv!nDXt
zn3TZcftjG2Xv~V+;ARTD$T004VEAfs;RYKsk(^f9u$B28p7BRXb8HX`VV7(#Vsa`x
zJu~x-*)hiSTq9P7{x7t)f`6uaK&DMckoKp$a1*h#Le&q+trU1Gl0&tFIWCuKjtg~y
z5eNGC#;tQVz*&_^3C)y~^&K8Yr4@zR;L_a9FtpR~!?#ncoo4Vserhfh&%ug?G!>J~
zQksN8MrKQLMhR2r0@ds>tpbwzEov~ekgV-?6?yPoC@M^%V6<LR4td!mFM6qcBjuSU
zWwubF=x=Be9z+vC0j{WYttVmhH^vn_<w~}=l4$LQ4=ZYs>yo%Gf$P$dAoA>^a5Y0P
zL4<%i;o>c<zViYw?YkZk_Vmf8SkbrY{IU>cg`?q(i$dWMvG9mg2x-?Wva2M(=4
zt1TR5nz*CpHA~E2@~Q1(+m9_ja;$f3!lUkSF>qW89DlqBj-2Qpk(dWnEY!B4c#CA&
zl#iU744<={zh+PyZgh=V+b{~760d))Ys*t5c&e1=IwtI)Q>-|a`MJ!VVLaEDpeCPM
zdvm>Nv+jvm@C}K+A;~v{3sf5jcxDwskr3C=BveDRvFX>W!kLkO`^;0k*7=}vwNj~U
zuds(sv7(nMTm0d7qfIbBAr-c)X2tToAK4$+*P7SQuiC}@b}7GI$Z!7&G<C*V9R6`Z
z^z}cf{86Rg2cL0x^M?E@x<(`y_yF)>JF;C#9blVvgK#Z+<L6cT`vcZ50+#+-`xm>-
zfWLH=476E)*=8B&wEyye8IVasObjx~h`5t=N0}WG_b8%&$rVOCkHu>PlGm~<pE?Jl
zc$ESlzzmFUD1s7)(f2a-s{$1qnl;jn)TK82PTKI5gQAQwUz4RrnlaLjWter+&$=+G
zl(lTbw=tXV(aO7mJ*bjQ^)*|TzrEz(QL~3k7PV~GdC+j^Q|n8|S1J3h6aR4ix@zXD
zK!JkWz|3+vKS2>XPDKlQa`#=he#G~^?ruKh6!YPF)`7Ywzexx^jFfOMnfXhU=3erC
z{ry%v8%=3x2OL7Cwx9DQ-mO(<i84i36JM=NepEDN8}u04vhz(<Eh^LIV%f^yTF$vw
zt9>6Y_l{wZ4$rTDb6AO)qHayK)XQa<LDRq2yPjnaA6U+T!&DUubP9Yb(ho>=k{i;r
zt<ij`E^S_VN>C(HdKaxQvHo#>V0zhJr0-R<kxlmdWxKW~1zitqcfHWUg_*ErZ;)fX
z%Een@Hq0B$r8`}HGe4_a-=?Uq7ps!INcp*?dH8aeyEz+L45`eogupw=flu*7{7s93
z!-jZ{%y*IrqdNQ7DltW#&w?^RnSTeB<8Kk59af|w(?*V_J0ib8W~*tCF{OzXL!lY>
zqYB+6GeNoojYAxxWz(fXHfYT-wrkdrnp0l?A7<Szf?LZ0B;?NlfQvUl*D7yLr@{1%
z<1i`ZQ~Mo_bYxm<@JsWPiPlUKTa&3qF-?ctPy+UWrmSK}XC=UY+c&u3*c@7Qd@>Mq
z{NVJ%)A-D5;~9Yu#LZJwJ>r)09}w6>fUT+6>WUGR|9~31L8+dq&7i(^*aBOco(_e>
zSC?jIZ&x+uYnE$g=w(e1pm|-+_+QiQQMzRePF4ibyLqC?%X-Way1Gd~LIk`fvh^OT
zDy!hXp{U;{uu6a`$V36;2O+&<xhU!;ai8Xc$3BM4Fe`YZvk7)f^7Y703~kB-@&5^>
zhlw|lSRrbD<;9{)`-VU=%%Jz6#mJ}k^*&oNLK;6|@@T)vwM$&Pz_nwwRNwTp?#Nc%
zk<F`O-3h7ggveFHap776qF}!3<d(BmaMnV!Ko$%Tj*LEeTdcewRbG%K1D;ia5>Q6S
z;P7YJkA7KF&U_ij$`u<&#FAqWEpX+_*2tPa8{a7X>ExqHRq_cbi%Q|Iqp}9v)R?pE
zxv9i{*8GgzT*R7so;DrZYC0x1^-E3t#K8r#3_#4UUv;bv#mZnk-r_+R!0Pd5@XU5x
zDBg!N*4iPp9$T}lT@wAxF<<3VU;UPkxMRLOl5dZ|PI4vJ0--+mt|)2WDrw&sehg8F
zlTyh^4fW#uUk^NX{Ke_Nf+d;r@+rD5NG?441(l4Ekwk&V%7<KoBloO1R!}VzG<{Xy
z_*pHiKkdcS^hr&9k4MC&Gh+Q&ss1e7%aH?(uC3CpjmYC(v2<K29bdJ@3gFz);z0)B
z<M|p?S8W%CzVkxhB2Llws^oiBVCPrx!8rC{OsF|6`p!tcGlK67_=LsfF@In?D=`vd
zL$5y_dSh$o4RPp-G<1axQ7pcTVKR$Qa!eJ?vC@E4iZ-i_?W&S?wQLtSywKACSjpKg
zW$s9-Vl^!*gR2l;3&iq3>o@22nPYp7t>y^D4Pt(yYL4uv$n{EGufX-j@(NdS<q5z#
zma0+%79H(pA%fbBpNYZ2vBu(zYLopm%-A@l^^`2yI!Iw71ZX8Adjp{L7*7jJJ~<n_
zMks8&vr&GBZrQxhPk29|TQ-Ltr`r<*j!+nZLChrKzhOh@9;Q9|i&-iKOXapD+q#dm
z5*3hd+T9$h*&W+e9V<iYo!ZVTu%hVzo)!AHt#kv^z?Ey^59hXRbjO)|WorW;9@)0j
zodZ5yww-k4G8LC@=hBs%!dNz5lcRXMfWjz44QOhFcY^RX+Rsb+nfOTs7tDTZ(>g{8
zel|}$u6-PSa$Goj{vU@PzbExyB$=7ZQ^NFB;qq1KwQEBEHR<qmhMNjH+}83my-|cc
z8QnJFN4R{2a=AV$%)KRD=Y?rrx{P`uCKUwotbujQ`uIle#^TSL)+eO~>@5-Y81=_b
zqDI$*>o=t7S>f`m^x7PYNd?t@Yt_cYX6?_RPKwJXC<#BQpo6oPY>q=P#0pk`9{8MA
z|3*95RaVds^`O*erHvB7wP3Uz^__U+i1tbqK&QaR1@iBcY56w;+;6Jb9V-jOs&~f%
zb(r9`^G40qqHPnwXV8p7F1RK8irY4J32!T<+t{BW)B&lgZ6~{=DQY{HUBZ}Mxspr1
z$>Cm^UTt5we!pTH`{_i+a`x$v4L`Y86Ty?o{KzV8KDs6TcB&$q1!0MUh8HdMe^U>j
zCw9V`5m@H3GFvd~AsEn#8k;53c``H$U%~-m^O9gYEsf+QR$xlEJOcMIso%y#12G@a
zhfvl5NU5DXR53Pzl&SV75S1#gUXs|VE?v8Z1>)7I>5!t-#{V(U-@wC$yHOx$D^7F<
zn=2>gtBMs?;p-v8Z70H51O}+nvz@Ennrzk_xp`?KlT{Lkb;K6~Osm^N+|bsFwPFJ<
zj9U@*6|x>esO;t1QC5~<OIUJqrfEl#u1qKksufCo)yc-pk#!oW)~QIWMQPfV7bNmp
z#HTc=qNgdB&3}SIXQ?%NfUdMAN3B^F^X{V7R2wuG!5EPMZfejx^%kR4X;5M{>NsrF
znwXGHzxj6nj1_E(;wt?}-Ts3WOzD&ZJvJjxO=0??`iTRatMU{jLIZ~sjJ=!nZkEE{
z#g&Ffl@sYWzc@2Hvv^xK7`|<vz|WxpCir<kFlw{;1zg0fH~AURMTsGVAjbSjvSjHY
z@si|^82i`}xJ@sFHU*5UYAuX$-ltsU7FQXyi(He$H3?kPmt3yk?%%i#fKxRs3(r$4
zi@*KS>BI8QT9^b3{pe<!;692|^R4%-EG7Ox_xkO&70K`$rjh#o?q<`^9DUyGUwHd+
zvp>(Z;*uvuR@#P$z2tnv=-8-c#zw_-qu(Fm1!;yE7^bF7J7pb?-dk+Sw#tAtsx2c&
zD?XR0v}IX-KtoMoT}fvq4(v`NNdb;Y3drnE$P8$@l0DLusVfohPB*6#$rv(TNiHU1
zf%2q5%`Xvnn?Q1pdmFc7m@bmM$WIA@DBwKXAh8Z-VXsT(RVMk4G+@x{ly1kCR|Gzw
zZuf=K?d(=xjB6J3XRNSh^)gudg{@LyE1cP7?t?72>yz<l=?^9!P73ZW`4qWsiR%`)
zZWeza+5_9Fg8KkY)#y_!uLKnFME7NVSOzRf-{zFy?!&2?U}e31Wl{c1bYCBJUzts~
ztFOZJd4;3jn*Di$x8Icgg~^I*RcDGx1mMBdgI+)D%4JxQQdidP=9zrJqZH9Gu;R?x
zx#|Vtc3DAK?+aS=L?vT(+6-*QlHxJ!{tbc*sxgZI24%JZDby0Bmz#D#Ng2<Xlw<&d
zGc%q|j~C<~rVKU^H=(fnwh4sIzd^0+8c{oVW5`@9sBt@%tp+8g5DuYY>N3Q2>1zkv
zTtnPU3eHA#NFiV$U%31=sBd0c9r6uvQ^>H$*JFt5Nz2!3i0e(mEil9_NW=9R;`-8X
z3k`8od<EdyiVSg!(()}f#4S$4^&8^)({M`+aZA#0OAT>L({Ohg;_gbtHN-5_V;bU>
z>+ymW53_!pB8h}iSgFnom6|aK!Gg?V5YpnAM||qcV1$>_e!)P7nW2i+>5nikRB7sj
z&!$10YzB3z*0;d25njzp;?=$+UfoOL)xRX(ZarSG;bBT^<2^RAHhUdy)~IPSxQ{nz
zGXrstU}{?!;Wg>&WF$TkY}Vri_oOSkXzkl(W}89bJoDmCv(Pd~*{WsmWRQm2o9;_Q
zkz~{8*kKV@f7h<7C+&^Xj^IA3XRwV-gJx8Zx7@YAYu7C6_D4>P&=L+hVDNRuheH9T
z6|dXTF|iAoIRVKG+F8pw<K%$jIuep$AlXoF2P!06>y2Bm47?Gy%#(nFW%2gS5TrKj
z6gJ6%<Vy-?3Q~#&zN~?ls)0}4Nkb(_YTH*+L=Ayj0z@R>X^$)G)hCJkofJmfY`zVv
z2S{qbp!qE7<6swZh4SU=xv5)|ve)Z4$DE7c+2RfuGoK9IT$sKdjysr0^(1){iRZ%O
z2v1(RCt)Hd6nDK7nz?p;aWWX1p1O^@DSmG0CPmFG#67e8WSB&;mmr@p$=V}_{|vR^
z|2={KKwyKwe<bi{0CD@w{M8VoT&6>DH&tss^bTZOZ$OeI?pzGbq2-w9Ut@OMr(#Yf
zYsdebaw3y#kh;^bF67r7%=n%mc?gb@;{;s6PEC_9x=4wBb5Du!Mj_S`j#TR94K@8n
zUi>pF0dR}LgkdbCr%#gBS3}ISQaoqrCbkM8*=VC)@<k(b=-~f^o`M$hieVnwU+}}M
zeOPaz`?!;!LRd+>RThz(c!x|}J?>666wBvdkZ+iVFzq7bL*Bv;QK<wT;A0d1I{@0k
zj+m`(GD6?H7GA$7<aUa=ol<TmXwmkjP0yG9vWM>ITcW>F@;9!Wg7}8LiA6C$MxdL>
z2(%_F8JoPKt!pMM5Y<aooFLn|ysNLlBR4ta_E$ygSy?ex@l#jD7W|x)*MEBH<4fyZ
zV)=fle1FO<{3!>(I|FzQm4m#m36SzZ9!N_{wj%lZu89tU7!OWJB3n=y1MXOU`Pv=Q
zY+t`6RvwTl4{T>6^jUsI)JeMV8*VYsD+PMDtq4UuJ!Na(VRj3?BbFbO$`5YaD4a8S
zDwJIAij_yCN|2!`9Lhwg;Z6#7nLL##`R7u&I~_ld!t>L^JrwRu4=<o_pUG1b9sbkP
zKRhjT42rd{NVTtQ7g9u#$y2^|gP9YU6)O%&6^FKqDcqkPUP9re=?}b%!pqX}%PG7f
zJ-m{_1JJ69?jkD;8<k>JpH$ViT}9#5>G(AiUYjuIFeH|BOJ&_L#VSRj)prvTy*>t>
zn|$=%0dNb@N{;Dg1ZO4ASV?uX8w$F_H%oM<iiS_;Kc0uKZ$*bx(Gja{{nN1@j%^$k
zYmZ5_$6nZ-iq=X5XEn}PSzQ#CDcV*>HQ6>lh1>Cpy&EpE;)qmnBvz3cP}TahYX4T%
z{*7L-YCx(Qhy_}n2HLg)Z5yp(pic_)r3cjQe_GeIRo5kS4~ccdQXPzB*yynVo||m+
z*Z{Z%@PTc()+jh@aAJ%^??~nCFs-aUc0<?GhMuj4p3TEz!%3;(WUPAM)9Q|`>JFjv
zgjhW&RS(80Tb@?7ZB@2yw1}0*q{?GZ;-ta=pu|aq0pK<_Pvi6D?Bij<cNV9}os+n8
z0(VaJ(lP=es_%$ZZBkX6;2V)oksFn`QGpwk9kl%RE14LEQnIWvpyaOayM)418+QP3
zLd}5virkRI4GG*(jLS{X6Wl~kXvbvtzW4Ml(}#tf**|sR;un^Kw!g^6#b22Z+Wva1
z99RFe%W=k;{ZDmOXL7QI94oG;kxV^k<-w>6YY;Tm)02tWB_B+{7<b9N`dxC~3XWF#
z5!9J9O1D}7K@6jm@bDMpu3`WF$e~wbdqEnf*33}Qm1-m51u1iNDcw6rnFlX<`9H+I
zc?b3M>PjovL1{i+%EBF_EYhVc-a$(LB5^;JBUrM7l%?8rZE)8PQkLmbmhT{Cg)U{~
z4pPGKSg`t0%}e(-7zhj2?Et?X_`7$2-=J&v#vMFKlP+cR4pQz3wgg)->h|uSH}BK+
zthODLwjX1s9b=~>*!id{Rd60N196Dbj-MdpXKE;Dn;H?dq!n|m$4ad^GKiL`J3Bp{
zGuZv;fIh?w*(s8kN;<>lFN5Nm+ysM5nT*r#M(<|LpM)W_QnjKc?DP&nKdUSm64lf^
z85)ENKqdyjVpS67P0Y{o5$w^IOWve{DCgbmc~1UK?g{ZU7S!0ANXtYv)jMzr4<AMp
zFo;vi4u+xL0po~*LClvVhI$8#qY4Hw2$LA<9WZ(o3{~xbm7?AO<Cua$tixm;>K!op
z6bz!GCo$AJVDu{(>f2(asCU2^P%zYY#4yx5U>sL4$TVEC6!i`mClrh_9fo=bj6ns1
z<iyy)TZ6A4{G<|2n_g}BDJ7f?&S}Gkl<<HOY+|)l?~voLf}!>khN0d8V?@EA?KL|X
zhI$8#Q3Zqc-boDg4j5w$!xE*9dD@{Lo2353<PJ%lNahm+bp5|BEdvAuPcvnsGvqxt
zozekCo2+Fk)#3hLm{%mqK-v*JtREBMLd+$Z$J$vvCRPY9nI}9AvlS2DWSHWJKJORQ
z;+!6H2esh7wH8P9dB30*<9bYRxKAgWHh3F6lEa42dN^p)&z+#QA2GyEX&K-iHN;J+
zKX7{uaeIRkX|*_Jh<z-0J`KCi5W6pUAq~6V5W7EkF%5gb5PLx1@_5GMhPcOrucqaE
z!VvpJ@U=8-@ahc4&0q%XlZMzQgRi9JeaaB~RPgmQ>>)$!p$yo=hS<Zw@1*5DVu(Ew
z{B9cds3G=fuq6$9%n*AlcqtA0vLW{6;A9&18;00#r1g?1LtMDXOUwC+A@-FF*wcpC
z)4^a`-XTNmQ1EIhHcZ{x7SAT<7X4S_E{8Q1NeL)N@Y=F1xn=<k{CWnux@H~57wb~b
zXxC^N*HzlG-b7jID&pI)_4UTA%|)9MwbuSZ{m1sE`jrj7^5}+sj{*9>whpuU_l$X1
z8#||OjdTSLZM^!G^6T=L2TnnvhMAsnUf+fbz*pCJnecVF--HV9Tk1l~6lB6*a%sVE
zGW=WfS$si0OFPKtZPZ>}!F+3}?*RLSJs^098S+rqi6&avw_#<keM`6g|E%GutkhL}
zV<ghT{4=OhknhYaUe84J4=>%kxxg=~V*P>RAf^Y-@C&5v0Om#@;qU>9u+Cs8Jk8JC
zB(cf}*U}>Mi6WI}pp8mmmKr92;x>RWkDy%pW6*-)mg|trjO4bo5cR!9;r7thLN>|h
zWF^bNq~tq&ePITb;J-lHh+9qt*`gNWAVvI{^kP_yQ^}viVj})T)wBYOq$R6XLpEsx
znKmXzxeH~<%E5AN<XA#x)`(iJ&V*)zk^EEhWJ;Oc1X!V{o@}N_!SE>7mNh6W57pdq
z#6xNnp@7EZ5tRZ?PfJ)fhZHFtow_wMw=}0!H&%3`36=3ST9{H~#5cji83(;Bb{}9|
zr--+uMR9k5_oZMKT_7Wp#FT>Xj+>ATRB&C$Fce-?u<|Exof^+v4gu@dV%!mi1?dnB
zmMw*N51I_GD+eqiWGcU{qLvmuG`H|}2>FGkXQpObrmus&7%4f&LXk5I4Nxetnh=H8
zHoNdnh@Xc1Sfpe?F*V5SYoTW)qfpVgnfVr(OBX2^#bZ+&F+?ShP|%Jliqec)l)U8o
zV)o-?<J*5oJuNr!>R3bsnJI4;`rVOyB~rDbE!X=R3VRz&MN8az{6zoxSK^jaV}qyT
zwu^n|#!ijB61Sc>ckc8#{_k1-BMVF#I3Op<=6{LMxGfmEI<+*rD7RlnT?2I07iZ=|
zjbYdtt!to8rd+aQn)^A7)2VBS#Y?cVNM>FuvdfS`*Arka7{@fi&b0MrqQj{|xEa{3
z6EeS*_GN}l*NFc#3@`FCLF$v5X(6yX;@vGXlLtmlk#W@}m|Tq<(9H>fnQ(v)y|n~O
z<-tHh2oo2W&hRy(%?D=wkU?XlT|Y1Al49^?5>5<9nl)2IS_;iff!QW3&kHo!%R<&=
zbj1w4SK(@q-%a<81a@w<z0y#1P;R0~o-AQ4&xuecjFg^dbF!u-l!+FN$h`FEW|W=)
z6kSM<Y>77Fzkc|;>SS)D*oG424}e@Ga&^i9XbRlqDzP+!;?@%gOe`!;%_e3jbS$Q2
z_q)^_w*e@-aW=~k#~fyA_<u#AzCa*#93i(t;HJ)rJ%J@^@Lzv&KOlE!D4MZ(E}<}p
zEC+zl*c|8JO@_%Z#NB=TwWT?jaX3RF4Sd`+6%0-ymW9dMJjalR?*oWCSQ?6kskvEs
zXvUparouDRn1mqM9=BZ!Elw>i@??PN8pH!I&}EH6#wPx+l+yx%|3cYihi}8s1I$t^
z{y&bcml#~KPT@@|Gf05we6p4W6P_i_BG}c-VGqW~E%a!NXU^<r!Mo1v%p|5}8lCvM
z{%=bBe+Zlch;xaCmGuHMaL418IecsQI7<0v3)tg_fdjugIVN|Jlv^vbjSIOGV(x^L
zJ8^FioM5Z-sik<!QoQE)tUzd=5G=)_<-BA$FIdiNWAipz9{Yv1A>r(0hB7HxCI!o6
z%;J!f3;u@B!otCE7C9kVkkN!1xt9CcfY5b@#hsNbX9dgIWL%!mc1k(VTw*AfB?}6<
ztSkPrY2m;Siyf9M!-8d4%|I!6ghh@@mQlensz$EmQ9|J3sYWCVkVew7)jhK|Wk~4B
zTxY?*=NFwq??smFtCHna!SZS{q5HG*!a>6#-RsLj|94r+OOgc-d`V3y`1fs=2_x57
z<aNn%U9enNBiC}*XNA7Z3D?}8jSJldEs!3kX#v6CxN&Opme79k$$PBW)b}c9eTu~m
zNfx|<Ax-kNTw(8oa$XJ!HzEx0j%2wbSnfP?dY(GVwwz@UTacU;_l~0vT3wQ*_)B-;
zzrD7$`0!oP-72|TV|gdQthVMr?7^A?u?H&@KnS=@1%>y9Q5xs^;0|;=p^ueivHQs<
zM8~5;VrjQj+PyNoGW;T&Pdytjjo*V)bRCjhhXmK5FI{=}Pp+Q*-tc`SP5votSi2t=
zkas97zBd}PIVBquSk2Z0vE9wm?(UV`)j5%?W#Lc<GFy9M4SS@9g9x7&xjKY@@Gb>+
z#OfM<bQz&Hpf~%%Ty~bbaczw9F$yoEYW71zbDy-QZzWeKY7n_bs{eZw(G{z2`q2XF
z4@=!OYG^Y;lVOsL+^1VRWBWQN{T`8PQJ=pF&!2qy7v-5+l4OHAp4oah*4QF79zq@q
zB3G~0K!=TfQC$19sBx>PQ7mefikhj@K+g~0dybXtmGd#Dd$k!ZTXCvtxo0=p1^-!`
zPwM1f(REI8ofBN=V!Hw>&eZ{!CWoRk@^%&fe((2t*N%wJM#<Som4eP~f}wv}<jzRk
z8G$?V%;x>Q_uhYR^{!~ElWcYC<y*Ec!Pd2T=WpNp%lDqVEB0QNdM_hDO*$lU!xA?v
zaKoud{ady+!Pd6f{I`4ma_^I7vFC!+a{&Qr(xmzgP3G{dS>nb>NdTx&eRTT4>F6lb
zt9^ZvuTSvx!4rx*=UHLkqsa%8(aU0Chg8@h6n4N93R32n3MxL@`(SUhS@gC_-d4fe
z3Qs7AfF<F8l2y0pJ0$rI3BE({gn}pxj6_tI=<AVuP(tp3Clo}vOhr|4IUAS6!U3sp
zKqwr5Clo}v)l@?Jl<2!6`K}1QEAWJZxO_EE^zD~?`vu>ActSyhSB=<c7k%B5uUqhS
z!xIXke2SwEq4Ch>xLDFBmGlWEeei^WxFQqUq4z;=^zcT3SkNgIbP5HX+r<>+Pe%E~
zf-b3`ODO1qClo}LD!xR7mVw9TlYU%wQCykYPU}mew^Q<V3f@k5LP1nTGDoji&><Cc
z2n8MRgo3C*GD=fe6-8B>imN|bc(4#%*tjhg^+`p2LQx+)p&+U@naVE~^hgCgLO~Ba
zp&+W><SqZG@<C;^bp4{}>6AR3f~OOnP!QFiHhuCHH&R5CsigMPypQvQhJ%~kqJKd0
z4+#DNctSzk9+SW3qxT-XC+zOtEEbFVq~boIxDTFC5Z9Wqti2SqFWDMl(c3P0+XZhs
zJfXOA?hXF01E%~MF|St2tG#z}+fropF@>7(jT~to)W^o<Q{*NjZbINDV$Qt#uz+ob
zeE^q7@E%@Yd~^ptI8{RciVn}9RGX^9gP8Bx?%P&u&pahhJ@s3j`smxFNk0HBX2DZ0
zdM-$w3oAKak?v5ZYW#}j*AdN;0{aDb2Tqael(<fT>x?-)l2di1z$(;9cz8T|-=eEa
za*=}poL^CGJ0RRwy&u0gMOUZf>J(g^Ec<iO`t?$&x?|%tsk--Zo>YBCaG%2|a^n&=
zE^y-vYb=@*ji4GE2c^J~%{eJBBDlwJiri_5J1uah8P<X51qm`!di5n1(-XZa)kCYP
z2dBs#lDI=c;!KDlr7AY<TR$tf_u&+|Hi>H!xVA4j=Y4L~^F8N%=Zfx6`5Ic8l2aSj
zoxgJVhN?`zs&Wk1XaB0XZMZ7?AF8al{_jUmxUaZPzjiyO?b*Nf*H2rr|Jh>2H96v>
zb?D?INH=VE_A0GmX|n+D;Ea}|-?KHg$+p<AES?Weg=I5rR*3fAyv_dst$blrZt?o;
z%oW+5TMmSDv3J#I_vcXlWJgNY^CvMVS#6Eh$UJG-#<Lb6oJ+*5>Dj4p7>jEcg=SA(
zndZqD5Kk6*cmWINcyag!lOArB<&MLlSvch)@(FPuc^^>te<UCg_;X5O$1a2@2UxfF
z5DslQ;vBYRBWy$RcZA3`0<Tc)#{_;zpoAirVS%G`OM1slU6${s+naRDROi@ch8FcH
zMhU**4x^5cTRzJF55(d$g-KHk60T-*%;dbs{$i#AA#uh`ULkSDOnyOs#!NlJPR^LA
zNyv1@O!j;1FJ`I|^k>XeBV;;bCigw|7c+GT`csoz-aYmgGdb?DKlK@(*-KZ7C41@G
z0nuJ7*=z6R#7sHAm-l|&s_nyE(NrOsDg;x-GqYnmtIG@r5b5AKg?&xndA2Fb`9X<{
zdDDC<%ZvqIX84?if31zZU~!srVLTuWtd2jY(65tGWg)8>12r>T8+*=TzShQ{aV}+3
zlFV@R-RCUkYi;~Z+eM2RMg}s&=PdkdZR{dvH)Cx9us*<kHet~Kcf#wJ<llE9?04H_
zx|4M>Te(i8k#GGD$^syYEby~|Ebt?oJ1_svzr}u$#o4Thti<(1_7t*9B-LD%a^^xZ
z%YdKF4)&7@ikxQOIxH~YCl!>9na%EK&9;et*K60q8w2aN9<|E%8Q_(yt67wb2A;E;
kEUuNvR(6SyT@tf!_m2P0;0J^E*xz=x$x@17rttrN0qT_D)Bpeg

literal 0
HcmV?d00001

diff --git a/pipelines/styling/data_processor.py b/pipelines/styling/data_processor.py
new file mode 100644
index 0000000..cacb5b6
--- /dev/null
+++ b/pipelines/styling/data_processor.py
@@ -0,0 +1,1488 @@
+import json
+import pandas as pd
+import numpy as np
+from pathlib import Path
+from typing import Dict, List, Optional, Union, Any, Tuple
+from datasets import Dataset, load_dataset
+import os
+from dataclasses import dataclass
+from abc import ABC, abstractmethod
+import logging
+from sklearn.model_selection import train_test_split
+import re
+import argparse
+import sys
+import yaml
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+@dataclass
+class StylingConfig:
+    """Configuration for styling tasks"""
+    # Data source configuration
+    data_source: str = "huggingface"  # "huggingface" or "custom"
+    dataset_name: Optional[str] = None  # For Hugging Face datasets
+    data_path: Optional[str] = None  # For custom datasets
+    data_format: str = "jsonl"  # jsonl, csv, json
+
+    # Field mapping - User configures which fields map to input/output
+    input_field: str = "text"  # Field in dataset containing source text (e.g., "text", "source", etc.)
+    output_field: str = "styled_text"  # Field in dataset containing styled text (e.g., "styled_text", "target", etc.)
+    instruction: str = "Rewrite the following text in a formal style"  # Style instruction from YAML
+
+    # Data processing
+    max_samples: Optional[int] = None
+    train_split: float = 0.8
+    validation_split: float = 0.1
+    test_split: float = 0.1
+
+    # Text preprocessing
+    clean_text: bool = True
+    remove_special_chars: bool = False
+    lowercase: bool = False  # Keep original case for styling
+    min_length: int = 10
+    max_length: int = 1000
+
+    # Output configuration
+    output_format: str = "styling"  # instruction, conversation, qa
+    output_dir: str = "./data"
+
+    # Hugging Face specific
+    hf_split: str = "train"
+    hf_cache_dir: Optional[str] = None
+
+    # Split configuration
+    test_split_from: str = "train"
+    val_split_from: str = "train"
+
+    # Custom data specific
+    encoding: str = "utf-8"
+    delimiter: str = ","  # For CSV files
+
+    # Alpaca prompt configuration
+    alpaca_prompt: str = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that follows the instruction
+
+### Instruction:
+{}
+
+### Input:
+{}
+
+### Response:
+{}"""
+
+    eos_token: str = "<|eot_id|>"  # Use <|eot_id|> as EOS token
+
+class DataValidator:
+    """Validates styling data quality and format"""
+
+    @staticmethod
+    def validate_styling_data(data: Dict[str, List[Dict]], config: StylingConfig, is_processed: bool = False) -> Tuple[bool, List[str]]:
+        """Validate styling dataset splits"""
+        errors = []
+        
+        # Check if we have the expected splits
+        expected_splits = ["train", "validation", "test"]
+        for split in expected_splits:
+            if split not in data:
+                errors.append(f"Missing '{split}' split")
+            elif split == "train" and not data[split]:
+                errors.append(f"Train split cannot be empty")
+            # Allow validation and test splits to be empty for small datasets
+        
+        if errors:
+            return False, errors
+
+        total_samples = sum(len(split_data) for split_data in data.values())
+        logger.info(f"Validating {total_samples} total samples across all splits...")
+
+        # Determine field names based on whether data is processed or not
+        input_field = "input" if is_processed else config.input_field
+        output_field = "output" if is_processed else config.output_field
+
+        # Validate each split
+        for split_name, split_data in data.items():
+            if not split_data:
+                logger.info(f"Skipping validation for empty {split_name} split")
+                continue
+                
+            logger.info(f"Validating {split_name} split with {len(split_data)} samples...")
+            
+            # Check required fields
+            missing_input_count = 0
+            missing_output_count = 0
+
+            for i, item in enumerate(split_data):
+                if input_field not in item:
+                    errors.append(f"Missing input field '{input_field}' in {split_name} split, item {i}")
+                    missing_input_count += 1
+                if output_field not in item:
+                    errors.append(f"Missing output field '{output_field}' in {split_name} split, item {i}")
+                    missing_output_count += 1
+
+            logger.info(f"{split_name} - Items missing input field: {missing_input_count}")
+            logger.info(f"{split_name} - Items missing output field: {missing_output_count}")
+
+            # Check data types
+            type_errors = 0
+            for i, item in enumerate(split_data):
+                if not isinstance(item.get(input_field, ""), str):
+                    errors.append(f"Input field '{input_field}' must be string in {split_name} split, item {i}")
+                    type_errors += 1
+                if not isinstance(item.get(output_field, ""), str):
+                    errors.append(f"Output field '{output_field}' must be string in {split_name} split, item {i}")
+                    type_errors += 1
+
+            logger.info(f"{split_name} - Type errors: {type_errors}")
+
+            # Check for empty inputs/outputs
+            empty_inputs = sum(1 for item in split_data if not item.get(input_field, "").strip())
+            empty_outputs = sum(1 for item in split_data if not item.get(output_field, "").strip())
+            
+            if empty_inputs > 0:
+                errors.append(f"Found {empty_inputs} items with empty input text in {split_name} split")
+            if empty_outputs > 0:
+                errors.append(f"Found {empty_outputs} items with empty output text in {split_name} split")
+
+            logger.info(f"{split_name} - Empty inputs: {empty_inputs}")
+            logger.info(f"{split_name} - Empty outputs: {empty_outputs}")
+
+            # Show sample of processed data for debugging
+            if split_data:
+                logger.info(f"Sample processed items from {split_name}:")
+                for i in range(min(3, len(split_data))):
+                    item = split_data[i]
+                    logger.info(f"  Item {i}: input='{item.get(input_field, '')[:50]}...', output='{item.get(output_field, '')[:50]}...'")
+
+        return len(errors) == 0, errors
+
+    @staticmethod
+    def analyze_dataset(data: Dict[str, List[Dict]], config: StylingConfig, is_processed: bool = False) -> Dict[str, Any]:
+        """Analyze dataset characteristics across all splits"""
+        analysis = {
+            "splits": {},
+            "overall": {
+                "total_samples": 0,
+                "split_sizes": {}
+            }
+        }
+
+        # Determine field names based on whether data is processed or not
+        input_field = "input" if is_processed else config.input_field
+        output_field = "output" if is_processed else config.output_field
+
+        # Analyze each split
+        for split_name, split_data in data.items():
+            if not split_data:
+                # Handle empty splits
+                split_analysis = {
+                    "total_samples": 0,
+                    "text_length_stats": {},
+                    "missing_values": {}
+                }
+                analysis["splits"][split_name] = split_analysis
+                analysis["overall"]["split_sizes"][split_name] = 0
+                continue
+                
+            split_analysis = {
+                "total_samples": len(split_data),
+                "text_length_stats": {},
+                "missing_values": {}
+            }
+
+            # Text length statistics for both input and output
+            for field_name, field in [("input", input_field), ("output", output_field)]:
+                text_lengths = [len(item.get(field, "")) for item in split_data]
+                if text_lengths:
+                    split_analysis["text_length_stats"][field_name] = {
+                        "min": min(text_lengths),
+                        "max": max(text_lengths),
+                        "mean": np.mean(text_lengths),
+                        "median": np.median(text_lengths)
+                    }
+
+            # Missing values
+            for field in [input_field, output_field]:
+                missing_count = sum(1 for item in split_data if not item.get(field))
+                split_analysis["missing_values"][field] = missing_count
+
+            analysis["splits"][split_name] = split_analysis
+            analysis["overall"]["total_samples"] += len(split_data)
+            analysis["overall"]["split_sizes"][split_name] = len(split_data)
+
+        return analysis
+
+class BaseDataLoader(ABC):
+    """Abstract base class for data loaders"""
+
+    @abstractmethod
+    def load(self, config: StylingConfig) -> Dict[str, List[Dict]]:
+        """Load data and return dictionary with train/val/test splits"""
+        pass
+
+    @abstractmethod
+    def preprocess(self, data: Dict[str, List[Dict]], config: StylingConfig) -> Dict[str, List[Dict]]:
+        """Apply preprocessing steps to all splits"""
+        pass
+
+
+class HuggingFaceDataLoader(BaseDataLoader):
+    """Load datasets from Hugging Face Hub"""
+
+    def load(self, config: StylingConfig) -> Dict[str, List[Dict]]:
+        """Load dataset from Hugging Face Hub with flexible split handling"""
+        if not config.dataset_name:
+            raise ValueError("Dataset name is required for Hugging Face datasets")
+
+        logger.info(f"Loading Hugging Face dataset: {config.dataset_name}")
+
+        try:
+            # First, let's check what splits are available in the dataset
+            dataset = load_dataset(
+                config.dataset_name,
+                cache_dir=config.hf_cache_dir
+            )
+
+            # Log available splits
+            available_splits = list(dataset.keys())
+            logger.info(f"Available splits in dataset: {available_splits}")
+
+            # Initialize split data
+            splits_data = {
+                "train": [],
+                "validation": [],
+                "test": []
+            }
+
+            # Handle train split
+            if "train" in available_splits:
+                train_dataset = dataset["train"]
+                logger.info(f"Using 'train' split with {len(train_dataset)} samples")
+                splits_data["train"] = list(train_dataset)
+            else:
+                logger.error("No 'train' split found in dataset!")
+                logger.error(f"Available splits: {available_splits}")
+                raise ValueError(f"Dataset {config.dataset_name} does not have a 'train' split")
+
+            # Handle validation split
+            if config.val_split_from == "use_val_if_available" and "validation" in available_splits:
+                val_dataset = dataset["validation"]
+                logger.info(f"Using 'validation' split with {len(val_dataset)} samples")
+                splits_data["validation"] = list(val_dataset)
+            elif config.val_split_from == "use_val_if_available" and "val" in available_splits:
+                val_dataset = dataset["val"]
+                logger.info(f"Using 'val' split with {len(val_dataset)} samples")
+                splits_data["validation"] = list(val_dataset)
+            elif config.val_split_from == "use_val_if_available":
+                logger.warning("No validation split found in dataset. Will create from train split.")
+                logger.info(f"Available splits: {available_splits}")
+                logger.info(f"Will use {config.validation_split * 100}% of train data for validation")
+            else:
+                logger.info(f"Will create validation split from train data ({config.validation_split * 100}%)")
+
+            # Handle test split
+            if config.test_split_from == "use_test_if_available" and "test" in available_splits:
+                test_dataset = dataset["test"]
+                logger.info(f"Using 'test' split with {len(test_dataset)} samples")
+                splits_data["test"] = list(test_dataset)
+            elif config.test_split_from == "use_val_if_available" and "validation" in available_splits:
+                test_dataset = dataset["validation"]
+                logger.info(f"Using 'validation' split as test with {len(test_dataset)} samples")
+                splits_data["test"] = list(test_dataset)
+            elif config.test_split_from == "use_val_if_available" and "val" in available_splits:
+                test_dataset = dataset["val"]
+                logger.info(f"Using 'val' split as test with {len(test_dataset)} samples")
+                splits_data["test"] = list(test_dataset)
+            elif config.test_split_from == "use_test_if_available":
+                logger.warning("No test split found in dataset. Will create from train split.")
+                logger.info(f"Available splits: {available_splits}")
+                logger.info(f"Will use {config.test_split * 100}% of train data for test")
+            else:
+                logger.info(f"Will create test split from train data ({config.test_split * 100}%)")
+
+            # If we need to create splits from train data
+            if not splits_data["validation"] or not splits_data["test"]:
+                train_data = splits_data["train"]
+                
+                # Handle very small datasets
+                if len(train_data) < 3:
+                    logger.warning(f"Dataset has only {len(train_data)} samples. Using all data for training.")
+                    splits_data["train"] = train_data
+                    splits_data["validation"] = []
+                    splits_data["test"] = []
+                else:
+                    # Calculate remaining percentages for train
+                    total_train_percentage = config.train_split + config.validation_split + config.test_split
+                    if total_train_percentage != 1.0:
+                        logger.warning(f"Split percentages don't sum to 1.0 (got {total_train_percentage}). Normalizing...")
+                        # Normalize percentages
+                        config.train_split = config.train_split / total_train_percentage
+                        config.validation_split = config.validation_split / total_train_percentage
+                        config.test_split = config.test_split / total_train_percentage
+
+                    # Create splits from train data
+                    if not splits_data["validation"] and not splits_data["test"]:
+                        # Split train into train, val, test
+                        train_size = int(len(train_data) * config.train_split)
+                        val_size = int(len(train_data) * config.validation_split)
+
+                        # Handle small datasets
+                        if len(train_data) < 10:
+                            # For small datasets, use more conservative splits
+                            config.train_split = 0.6
+                            config.validation_split = 0.2
+                            config.test_split = 0.2
+                            logger.info(f"Small dataset detected. Adjusted split ratios to: train={config.train_split}, val={config.validation_split}, test={config.test_split}")
+                        
+                        # Ensure minimum sizes
+                        min_val_size = max(1, int(len(train_data) * 0.1))
+                        min_test_size = max(1, int(len(train_data) * 0.1))
+                        
+                        val_size = max(min_val_size, int(len(train_data) * config.validation_split))
+                        test_size = max(min_test_size, int(len(train_data) * config.test_split))
+                        train_size = len(train_data) - val_size - test_size
+                        
+                        # Ensure train has at least 1 sample
+                        if train_size < 1:
+                            if val_size > 1:
+                                val_size -= 1
+                                train_size += 1
+                            elif test_size > 1:
+                                test_size -= 1
+                                train_size += 1
+                            logger.info(f"Adjusted split sizes: train={train_size}, val={val_size}, test={test_size}")
+
+                        # First split: train + (val+test)
+                        new_train, temp_data = train_test_split(
+                            train_data,
+                            test_size=val_size + test_size,
+                            random_state=42
+                        )
+
+                        # Second split: val + test
+                        new_val, new_test = train_test_split(
+                            temp_data,
+                            test_size=test_size / (val_size + test_size) if (val_size + test_size) > 0 else 0,
+                            random_state=42
+                        )
+                        
+                        splits_data["train"] = new_train
+                        splits_data["validation"] = new_val
+                        splits_data["test"] = new_test
+
+                    elif not splits_data["validation"]:
+                        # Only need to create val from train
+                        val_size = max(1, int(len(train_data) * config.validation_split))
+                        new_train, new_val = train_test_split(
+                            train_data,
+                            test_size=val_size,
+                            random_state=42
+                        )
+                        splits_data["train"] = new_train
+                        splits_data["validation"] = new_val
+
+                    elif not splits_data["test"]:
+                        # Only need to create test from train
+                        test_size = max(1, int(len(train_data) * config.test_split))
+                        new_train, new_test = train_test_split(
+                            train_data,
+                            test_size=test_size,
+                            random_state=42
+                        )
+                        splits_data["train"] = new_train
+                        splits_data["test"] = new_test
+
+            logger.info(f"Final split sizes:")
+            logger.info(f"  Train: {len(splits_data['train'])} samples")
+            logger.info(f"  Validation: {len(splits_data['validation'])} samples")
+            logger.info(f"  Test: {len(splits_data['test'])} samples")
+
+            # Ensure all splits exist (even if empty) for the pipeline
+            if "validation" not in splits_data:
+                splits_data["validation"] = []
+            if "test" not in splits_data:
+                splits_data["test"] = []
+
+            # Apply max_samples limit to each split if specified
+            if config.max_samples:
+                for split_name in splits_data:
+                    if splits_data[split_name]:
+                        original_size = len(splits_data[split_name])
+                        splits_data[split_name] = splits_data[split_name][:config.max_samples]
+                        logger.info(f"Limited {split_name} split from {original_size} to {len(splits_data[split_name])} samples")
+
+            # Log dataset info for debugging
+            for split_name, split_data in splits_data.items():
+                if split_data:
+                    logger.info(f"Sample data item from {split_name}: {split_data[0]}")
+                    logger.info(f"Available fields in {split_name} split: {list(split_data[0].keys())}")
+
+                    # Check if the required fields exist
+                    if config.input_field not in split_data[0]:
+                        logger.warning(f"Input field '{config.input_field}' not found in {split_name}. Available fields: {list(split_data[0].keys())}")
+                        # Suggest alternative fields
+                        text_fields = [f for f in split_data[0].keys() if any(keyword in f.lower() for keyword in ['text', 'sentence', 'content', 'input', 'comment', 'message'])]
+                        if text_fields:
+                            logger.info(f"Suggested text fields for {split_name}: {text_fields}")
+                    if config.output_field not in split_data[0]:
+                        logger.warning(f"Output field '{config.output_field}' not found in {split_name}. Available fields: {list(split_data[0].keys())}")
+                        # Suggest alternative fields
+                        output_fields = [f for f in split_data[0].keys() if any(keyword in f.lower() for keyword in ['output', 'response', 'result', 'target', 'styled'])]
+                        if output_fields:
+                            logger.info(f"Suggested output fields for {split_name}: {output_fields}")
+
+            logger.info(f"Successfully loaded dataset {config.dataset_name}")
+            return splits_data
+
+        except Exception as e:
+            logger.error(f"Error loading dataset {config.dataset_name}: {e}")
+            raise
+
+    def preprocess(self, data: Dict[str, List[Dict]], config: StylingConfig) -> Dict[str, List[Dict]]:
+        """Apply preprocessing steps to all splits separately"""
+        processed_splits = {}
+
+        logger.info(f"=== PREPROCESSING DATA ===")
+        
+        for split_name, split_data in data.items():
+            logger.info(f"Processing {split_name} split with {len(split_data)} items...")
+            
+            # Log field availability for debugging
+            if split_data:
+                available_fields = set(split_data[0].keys())
+                logger.info(f"Available fields in {split_name}: {available_fields}")
+                logger.info(f"Looking for input field: '{config.input_field}', output field: '{config.output_field}'")
+
+                if config.input_field not in available_fields:
+                    logger.error(f"Input field '{config.input_field}' not found in {split_name}. Available fields: {available_fields}")
+                if config.output_field not in available_fields:
+                    logger.error(f"Output field '{config.output_field}' not found in {split_name}. Available fields: {available_fields}")
+
+            # Count items with missing fields
+            missing_input = sum(1 for item in split_data if config.input_field not in item or not item.get(config.input_field))
+            missing_output = sum(1 for item in split_data if config.output_field not in item or not item.get(config.output_field))
+
+            logger.info(f"{split_name} - Items missing input field: {missing_input}")
+            logger.info(f"{split_name} - Items missing output field: {missing_output}")
+
+            # Show sample of raw data before preprocessing
+            logger.info(f"=== SAMPLE RAW DATA FROM {split_name.upper()} BEFORE PREPROCESSING ===")
+            for i in range(min(3, len(split_data))):
+                item = split_data[i]
+                logger.info(f"Raw item {i} from {split_name}:")
+                for key, value in item.items():
+                    if isinstance(value, str) and len(value) > 100:
+                        logger.info(f"  {key}: '{value[:100]}...'")
+                    else:
+                        logger.info(f"  {key}: {value}")
+
+            # Process each item in the split
+            processed_data = []
+            processed_count = 0
+            skipped_count = 0
+
+            # Reset debug counter for each split
+            self._debug_count = 0
+
+            for i, item in enumerate(split_data):
+                processed_item = self._preprocess_item(item, config)
+                if processed_item is not None:
+                    processed_data.append(processed_item)
+                    processed_count += 1
+                else:
+                    skipped_count += 1
+                    if skipped_count <= 3:  # Log first few skipped items
+                        logger.info(f"Skipped item {i} from {split_name}: {item}")
+
+            processed_splits[split_name] = processed_data
+            logger.info(f"{split_name} - Preprocessed {processed_count} samples, skipped {skipped_count} samples")
+
+            # Show sample of processed data
+            if processed_data:
+                logger.info(f"=== SAMPLE PROCESSED DATA FROM {split_name.upper()} ===")
+                for i in range(min(3, len(processed_data))):
+                    logger.info(f"Processed item {i} from {split_name}: {processed_data[i]}")
+
+        return processed_splits
+
+    def _preprocess_item(self, item: Dict, config: StylingConfig) -> Optional[Dict]:
+        """Preprocess a single item"""
+        # Extract input and output using configurable field names
+        input_text = item.get(config.input_field, "")
+        output_text = item.get(config.output_field, "")
+
+        # Log what we're extracting (for first few items)
+        if hasattr(self, '_debug_count'):
+            self._debug_count += 1
+        else:
+            self._debug_count = 1
+
+        if self._debug_count <= 3:
+            logger.debug(f"Processing item {self._debug_count}:")
+            logger.debug(f"  Looking for input field '{config.input_field}': {input_text}")
+            logger.debug(f"  Looking for output field '{config.output_field}': {output_text}")
+
+        # Handle None values
+        if input_text is None:
+            input_text = ""
+        if output_text is None:
+            output_text = ""
+
+        # Convert to string if needed
+        input_text = str(input_text)
+        output_text = str(output_text)
+
+        if self._debug_count <= 3:
+            logger.debug(f"  After conversion - input: '{input_text[:50]}...', output: '{output_text[:50]}...'")
+
+        # Clean text if requested
+        if config.clean_text:
+            original_input = input_text
+            original_output = output_text
+            input_text = self._clean_text(input_text, config)
+            output_text = self._clean_text(output_text, config)
+            if self._debug_count <= 3:
+                logger.debug(f"  After cleaning - input: '{original_input[:50]}...' -> '{input_text[:50]}...'")
+                logger.debug(f"  After cleaning - output: '{original_output[:50]}...' -> '{output_text[:50]}...'")
+
+        # Check length constraints
+        if len(input_text) < config.min_length or len(input_text) > config.max_length:
+            if self._debug_count <= 3:
+                logger.debug(f"  Skipping - input length {len(input_text)} not in range [{config.min_length}, {config.max_length}]")
+            return None
+
+        if len(output_text) < config.min_length or len(output_text) > config.max_length:
+            if self._debug_count <= 3:
+                logger.debug(f"  Skipping - output length {len(output_text)} not in range [{config.min_length}, {config.max_length}]")
+            return None
+
+        # Create processed item - Always use "input" and "output" for internal processing
+        processed_item = {
+            "input": input_text,
+            "output": output_text
+        }
+
+        if self._debug_count <= 3:
+            logger.debug(f"  Final processed item: {processed_item}")
+
+        return processed_item
+
+    def _clean_text(self, text: str, config: StylingConfig) -> str:
+        """Clean and normalize text"""
+        if not isinstance(text, str):
+            return ""
+
+        # Remove extra whitespace
+        text = re.sub(r'\s+', ' ', text).strip()
+
+        # Convert to lowercase if requested
+        if config.lowercase:
+            text = text.lower()
+
+        # Remove special characters if requested
+        if config.remove_special_chars:
+            text = re.sub(r'[^\w\s]', '', text)
+
+        return text
+
+
+class CustomDataLoader(BaseDataLoader):
+    """Load custom datasets from local files"""
+    
+    def load(self, config: StylingConfig) -> Dict[str, List[Dict]]:
+        """Load custom dataset from local file and create splits"""
+        if not config.data_path:
+            raise ValueError("Data path is required for custom datasets")
+        
+        file_path = Path(config.data_path)
+        
+        if not file_path.exists():
+            raise FileNotFoundError(f"Data file not found: {file_path}")
+        
+        logger.info(f"Loading custom dataset: {file_path}")
+        
+        if config.data_format == "jsonl":
+            raw_data = self._load_jsonl(file_path, config)
+        elif config.data_format == "csv":
+            raw_data = self._load_csv(file_path, config)
+        elif config.data_format == "json":
+            raw_data = self._load_json(file_path, config)
+        else:
+            raise ValueError(f"Unsupported format: {config.data_format}")
+        
+        if config.max_samples:
+            raw_data = raw_data[:config.max_samples]
+        
+        logger.info(f"Loaded {len(raw_data)} samples from {file_path}")
+        
+        # Create splits from the raw data
+        splits_data = self._create_splits(raw_data, config)
+        
+        return splits_data
+    
+    def _create_splits(self, data: List[Dict], config: StylingConfig) -> Dict[str, List[Dict]]:
+        """Create train/validation/test splits from raw data"""
+        logger.info(f"Creating splits from {len(data)} samples...")
+        
+        # Handle very small datasets
+        if len(data) < 3:
+            logger.warning(f"Dataset has only {len(data)} samples. Using all data for training.")
+            return {
+                "train": data,
+                "validation": [],
+                "test": []
+            }
+        
+        # Calculate split sizes with minimum guarantees
+        total_samples = len(data)
+        
+        # Ensure minimum sizes for each split
+        min_val_size = max(1, int(total_samples * 0.1))  # At least 1 sample for validation
+        min_test_size = max(1, int(total_samples * 0.1))  # At least 1 sample for test
+        
+        # Adjust split ratios if dataset is too small
+        if total_samples < 10:
+            # For small datasets, use more conservative splits
+            config.train_split = 0.6
+            config.validation_split = 0.2
+            config.test_split = 0.2
+            logger.info(f"Small dataset detected. Adjusted split ratios to: train={config.train_split}, val={config.validation_split}, test={config.test_split}")
+        
+        # Calculate actual split sizes
+        val_size = max(min_val_size, int(total_samples * config.validation_split))
+        test_size = max(min_test_size, int(total_samples * config.test_split))
+        train_size = total_samples - val_size - test_size
+        
+        # Ensure train split has at least 1 sample
+        if train_size < 1:
+            # Adjust validation and test to ensure train has at least 1 sample
+            if val_size > 1:
+                val_size -= 1
+                train_size += 1
+            elif test_size > 1:
+                test_size -= 1
+                train_size += 1
+            logger.info(f"Adjusted split sizes to ensure train has at least 1 sample: train={train_size}, val={val_size}, test={test_size}")
+        
+        logger.info(f"Split sizes: train={train_size}, validation={val_size}, test={test_size}")
+        
+        # Create splits
+        if val_size == 0 and test_size == 0:
+            # All data goes to train
+            splits_data = {
+                "train": data,
+                "validation": [],
+                "test": []
+            }
+        elif val_size == 0:
+            # Split between train and test
+            train_data, test_data = train_test_split(data, test_size=test_size, random_state=42)
+            splits_data = {
+                "train": train_data,
+                "validation": [],
+                "test": test_data
+            }
+        elif test_size == 0:
+            # Split between train and validation
+            train_data, val_data = train_test_split(data, test_size=val_size, random_state=42)
+            splits_data = {
+                "train": train_data,
+                "validation": val_data,
+                "test": []
+            }
+        else:
+            # Full three-way split
+            # First split: train + (val+test)
+            train_data, temp_data = train_test_split(
+                data,
+                test_size=val_size + test_size,
+                random_state=42
+            )
+            
+            # Second split: val + test
+            val_data, test_data = train_test_split(
+                temp_data,
+                test_size=test_size,
+                random_state=42
+            )
+            
+            splits_data = {
+                "train": train_data,
+                "validation": val_data,
+                "test": test_data
+            }
+        
+        logger.info(f"Created splits:")
+        logger.info(f"  Train: {len(splits_data['train'])} samples")
+        logger.info(f"  Validation: {len(splits_data['validation'])} samples")
+        logger.info(f"  Test: {len(splits_data['test'])} samples")
+        
+        return splits_data
+    
+    def _load_jsonl(self, file_path: Path, config: StylingConfig) -> List[Dict]:
+        """Load JSONL file"""
+        data = []
+        with open(file_path, 'r', encoding=config.encoding) as f:
+            for line_num, line in enumerate(f, 1):
+                if line.strip():
+                    try:
+                        data.append(json.loads(line))
+                    except json.JSONDecodeError as e:
+                        logger.warning(f"Invalid JSON at line {line_num}: {e}")
+        return data
+    
+    def _load_csv(self, file_path: Path, config: StylingConfig) -> List[Dict]:
+        """Load CSV file"""
+        df = pd.read_csv(file_path, encoding=config.encoding, delimiter=config.delimiter)
+        return df.to_dict('records')
+    
+    def _load_json(self, file_path: Path, config: StylingConfig) -> List[Dict]:
+        """Load JSON file"""
+        with open(file_path, 'r', encoding=config.encoding) as f:
+            data = json.load(f)
+        
+        if isinstance(data, list):
+            return data
+        elif isinstance(data, dict) and "data" in data:
+            return data["data"]
+        else:
+            return [data]
+    
+    def preprocess(self, data: Dict[str, List[Dict]], config: StylingConfig) -> Dict[str, List[Dict]]:
+        """Apply preprocessing steps to all splits separately"""
+        processed_splits = {}
+        
+        logger.info(f"=== PREPROCESSING CUSTOM DATA ===")
+        
+        for split_name, split_data in data.items():
+            logger.info(f"Processing {split_name} split with {len(split_data)} items...")
+            
+            processed_data = []
+            processed_count = 0
+            skipped_count = 0
+            
+            # Reset debug counter for each split
+            self._debug_count = 0
+            
+            for i, item in enumerate(split_data):
+                processed_item = self._preprocess_item(item, config)
+                if processed_item is not None:
+                    processed_data.append(processed_item)
+                    processed_count += 1
+                else:
+                    skipped_count += 1
+                    if skipped_count <= 3:  # Log first few skipped items
+                        logger.info(f"Skipped item {i} from {split_name}: {item}")
+            
+            processed_splits[split_name] = processed_data
+            logger.info(f"{split_name} - Preprocessed {processed_count} samples, skipped {skipped_count} samples")
+        
+        return processed_splits
+    
+    def _preprocess_item(self, item: Dict, config: StylingConfig) -> Optional[Dict]:
+        """Preprocess a single item"""
+        # Extract input and output using configurable field names
+        input_text = item.get(config.input_field, "")
+        output_text = item.get(config.output_field, "")
+        
+        # Handle None values
+        if input_text is None:
+            input_text = ""
+        if output_text is None:
+            output_text = ""
+        
+        # Convert to string if needed
+        input_text = str(input_text)
+        output_text = str(output_text)
+        
+        # Clean text if requested
+        if config.clean_text:
+            input_text = self._clean_text(input_text, config)
+            output_text = self._clean_text(output_text, config)
+        
+        # Check length constraints
+        if len(input_text) < config.min_length or len(input_text) > config.max_length:
+            return None
+        
+        if len(output_text) < config.min_length or len(output_text) > config.max_length:
+            return None
+        
+        # Create processed item - Always use "input" and "output" for internal processing
+        processed_item = {
+            "input": input_text,
+            "output": output_text
+        }
+        
+        return processed_item
+    
+    def _clean_text(self, text: str, config: StylingConfig) -> str:
+        """Clean and normalize text"""
+        if not isinstance(text, str):
+            return ""
+        
+        # Remove extra whitespace
+        text = re.sub(r'\s+', ' ', text).strip()
+        
+        # Convert to lowercase if requested
+        if config.lowercase:
+            text = text.lower()
+        
+        # Remove special characters if requested
+        if config.remove_special_chars:
+            text = re.sub(r'[^\w\s]', '', text)
+        
+        return text
+
+
+class StylingDataPipeline:
+    """Main styling pipeline"""
+    
+    def __init__(self):
+        self.validator = DataValidator()
+        self.hf_loader = HuggingFaceDataLoader()
+        self.custom_loader = CustomDataLoader()
+    
+    def create_config(
+        self,
+        data_source: str,
+        dataset_name: Optional[str] = None,
+        data_path: Optional[str] = None,
+        input_field: str = "input",
+        output_field: str = "output",
+        instruction: str = "Rewrite the following text in a formal style",
+        **kwargs
+    ) -> StylingConfig:
+        """Create styling configuration"""
+        return StylingConfig(
+            data_source=data_source,
+            dataset_name=dataset_name,
+            data_path=data_path,
+            input_field=input_field,
+            output_field=output_field,
+            instruction=instruction,
+            **kwargs
+        )
+
+    def load_config_from_yaml(self, yaml_path: str) -> StylingConfig:
+        """Load configuration from YAML file"""
+        try:
+            config_dict = load_yaml_config(yaml_path)
+            
+            # Create configuration object from YAML data
+            config = StylingConfig(
+                data_source=config_dict.get('data_source', 'custom'),
+                dataset_name=config_dict.get('dataset_name'),
+                data_path=config_dict.get('data_path'),
+                data_format=config_dict.get('data_format', 'jsonl'),
+                input_field=config_dict.get('input_field', 'text'),
+                output_field=config_dict.get('output_field', 'styled_text'),
+                instruction=config_dict.get('instruction', 'Rewrite the following text in a formal style'),
+                max_samples=config_dict.get('max_samples'),
+                train_split=config_dict.get('train_split', 0.8),
+                validation_split=config_dict.get('validation_split', 0.1),
+                test_split=config_dict.get('test_split', 0.1),
+                clean_text=config_dict.get('clean_text', True),
+                remove_special_chars=config_dict.get('remove_special_chars', False),
+                lowercase=config_dict.get('lowercase', False),
+                min_length=config_dict.get('min_length', 10),
+                max_length=config_dict.get('max_length', 1000),
+                output_format=config_dict.get('output_format', 'styling'),
+                output_dir=config_dict.get('output_dir', './data'),
+                hf_split=config_dict.get('hf_split', 'train'),
+                hf_cache_dir=config_dict.get('hf_cache_dir'),
+                test_split_from=config_dict.get('test_split_from', 'train'),
+                val_split_from=config_dict.get('val_split_from', 'train'),
+                encoding=config_dict.get('encoding', 'utf-8'),
+                delimiter=config_dict.get('delimiter', ',')
+            )
+            
+            logger.info(f"Configuration loaded from YAML: {yaml_path}")
+            logger.info(f"Output directory: {config.output_dir}")
+            logger.info(f"Instruction: {config.instruction}")
+            
+            return config
+            
+        except Exception as e:
+            logger.error(f"Error loading configuration from YAML {yaml_path}: {e}")
+            raise
+    
+    def load_and_preprocess(self, config: StylingConfig) -> Tuple[Dict[str, List[Dict]], Dict[str, Any]]:
+        """Load and preprocess data"""
+        
+        # Load data
+        if config.data_source == "huggingface":
+            raw_splits = self.hf_loader.load(config)
+            processed_splits = self.hf_loader.preprocess(raw_splits, config)
+        elif config.data_source == "custom":
+            raw_splits = self.custom_loader.load(config)
+            processed_splits = self.custom_loader.preprocess(raw_splits, config)
+        else:
+            raise ValueError(f"Unsupported data source: {config.data_source}")
+        
+        # Validate processed data
+        is_valid, errors = self.validator.validate_styling_data(processed_splits, config, is_processed=True)
+        if not is_valid:
+            logger.error("Data validation failed:")
+            for error in errors:
+                logger.error(f"  - {error}")
+            raise ValueError("Data validation failed")
+        
+        # Analyze dataset
+        analysis = self.validator.analyze_dataset(processed_splits, config, is_processed=True)
+        
+        return processed_splits, analysis
+    
+    def convert_to_alpaca_format(self, data: Dict[str, List[Dict]], config: StylingConfig) -> Dict[str, List[Dict]]:
+        """Convert styling data to Alpaca format with instruction"""
+        alpaca_splits = {}
+        
+        for split_name, split_data in data.items():
+            alpaca_data = []
+            for item in split_data:
+                # Ensure input and output fields exist, default to empty string if missing
+                input_text = item.get("input", "")
+                output_text = item.get("output", "")
+                
+                # Handle None values
+                if input_text is None:
+                    input_text = ""
+                if output_text is None:
+                    output_text = ""
+                
+                # Convert to string if needed
+                input_text = str(input_text)
+                output_text = str(output_text)
+                
+                alpaca_data.append({
+                    "instruction": config.instruction,
+                    "input": input_text,
+                    "output": output_text
+                })
+            alpaca_splits[split_name] = alpaca_data
+        
+        return alpaca_splits
+
+    def format_for_training(self, data: Dict[str, List[Dict]], config: StylingConfig) -> Dict[str, List[str]]:
+        """Format entries for training using Alpaca prompt format"""
+        formatted_splits = {}
+        
+        for split_name, split_data in data.items():
+            formatted_texts = []
+            for item in split_data:
+                # Ensure input and output fields exist, default to empty string if missing
+                input_text = item.get("input", "")
+                output_text = item.get("output", "")
+                
+                # Handle None values
+                if input_text is None:
+                    input_text = ""
+                if output_text is None:
+                    output_text = ""
+                
+                # Convert to string if needed
+                input_text = str(input_text)
+                output_text = str(output_text)
+                
+                text = config.alpaca_prompt.format(
+                    config.instruction,
+                    input_text,
+                    output_text
+                ) + config.eos_token
+                formatted_texts.append(text)
+            formatted_splits[split_name] = formatted_texts
+        
+        return formatted_splits
+
+    def convert_to_hf_dataset(self, dataset_entries: List[Dict], config: StylingConfig):
+        """Convert dataset entries to HuggingFace dataset format with text formatting"""
+        from datasets import Dataset
+
+        # Create HuggingFace dataset from list of dictionaries
+        hf_dataset = Dataset.from_list(dataset_entries)
+
+        # Apply formatting function to generate the text field
+        def formatting_prompts_func(examples):
+            instructions = examples["instruction"]
+            inputs = examples["input"]
+            outputs = examples["output"]
+            texts = []
+
+            for instruction, input_text, output in zip(instructions, inputs, outputs):
+                # Handle None values and ensure strings
+                if input_text is None:
+                    input_text = ""
+                if output is None:
+                    output = ""
+                
+                # Convert to string if needed
+                input_text = str(input_text)
+                output = str(output)
+                
+                # Use the config's EOS token and alpaca prompt
+                text = config.alpaca_prompt.format(instruction, input_text, output) + config.eos_token
+                texts.append(text)
+
+            return {"text": texts}
+
+        # Apply the formatting function
+        formatted_dataset = hf_dataset.map(formatting_prompts_func, batched=True)
+
+        return formatted_dataset
+
+    def save_hf_dataset_to_disk(self, hf_dataset, save_path: str):
+        """Save HuggingFace dataset to disk"""
+        try:
+            hf_dataset.save_to_disk(save_path)
+            logger.info(f"HuggingFace dataset saved to disk at: {save_path}")
+            return True
+        except Exception as e:
+            logger.error(f"Error saving HuggingFace dataset to disk: {e}")
+            return False
+
+    def load_hf_dataset_from_disk(self, load_path: str):
+        """Load HuggingFace dataset from disk"""
+        try:
+            from datasets import load_from_disk
+            hf_dataset = load_from_disk(load_path)
+            logger.info(f"HuggingFace dataset loaded from disk: {load_path}")
+            logger.info(f"Dataset has {len(hf_dataset)} entries")
+            logger.info(f"Dataset features: {hf_dataset.features}")
+            return hf_dataset
+        except Exception as e:
+            logger.error(f"Error loading HuggingFace dataset from disk: {e}")
+            return None
+
+    def save_data(self, data: Dict[str, List[Dict]], output_dir: str, format: str = "jsonl"):
+        """Save processed data splits to files"""
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        
+        for split_name, split_data in data.items():
+            if format == "jsonl":
+                output_file = output_path / f"{split_name}.jsonl"
+                with open(output_file, 'w', encoding='utf-8') as f:
+                    for item in split_data:
+                        f.write(json.dumps(item, ensure_ascii=False) + '\n')
+            elif format == "json":
+                output_file = output_path / f"{split_name}.json"
+                with open(output_file, 'w', encoding='utf-8') as f:
+                    json.dump(split_data, f, ensure_ascii=False, indent=2)
+            elif format == "csv":
+                output_file = output_path / f"{split_name}.csv"
+                df = pd.DataFrame(split_data)
+                df.to_csv(output_file, index=False)
+            
+            logger.info(f"Saved {len(split_data)} samples to {output_file}")
+    
+    def run_pipeline(
+        self,
+        config: StylingConfig,
+        output_format: str = "styling",
+        save_splits: bool = True,
+        create_hf_dataset: bool = False,
+        save_hf_dataset: bool = False,
+        hf_dataset_path: str = None
+    ) -> Dict[str, Any]:
+        """Run complete styling pipeline"""
+        
+        logger.info("Starting styling pipeline...")
+        
+        # Load and preprocess data
+        processed_splits, analysis = self.load_and_preprocess(config)
+        
+        # Convert to desired output format
+        if output_format == "alpaca":
+            formatted_splits = self.convert_to_alpaca_format(processed_splits, config)
+        else:
+            formatted_splits = processed_splits
+        
+        # Save data if requested
+        if save_splits:
+            # Save directly in the output directory, not in a subdirectory
+            output_dir = Path(config.output_dir)
+            self.save_data(formatted_splits, str(output_dir))
+        
+        # Convert to HuggingFace dataset if requested
+        hf_dataset = None
+        hf_dataset_save_path = None
+        if create_hf_dataset:
+            # Flatten all splits into one list for HF dataset
+            all_entries = []
+            for split_name, split_data in formatted_splits.items():
+                for item in split_data:
+                    # Ensure we have the instruction field
+                    if "instruction" not in item:
+                        item["instruction"] = config.instruction
+                    all_entries.append(item)
+            
+            hf_dataset = self.convert_to_hf_dataset(all_entries, config)
+            logger.info(f"HuggingFace dataset created with {len(hf_dataset)} entries")
+            logger.info(f"Dataset features: {hf_dataset.features}")
+            
+            # Save HuggingFace dataset to disk if requested
+            if save_hf_dataset:
+                if hf_dataset_path is None:
+                    # Generate default path using the YAML output_dir
+                    hf_dataset_path = str(Path(config.output_dir) / "hf_dataset")
+                
+                success = self.save_hf_dataset_to_disk(hf_dataset, hf_dataset_path)
+                if success:
+                    hf_dataset_save_path = hf_dataset_path
+                    logger.info(f"HuggingFace dataset saved to: {hf_dataset_save_path}")
+                else:
+                    logger.warning("Failed to save HuggingFace dataset to disk")
+        
+        # Create result summary
+        result = {
+            "config": config,
+            "analysis": analysis,
+            "splits": {
+                split_name: len(split_data) for split_name, split_data in formatted_splits.items()
+            },
+            "output_format": output_format,
+            "output_dir": config.output_dir,
+            "data": formatted_splits,  # Include the actual processed data
+            "instruction": config.instruction
+        }
+        
+        # Add HuggingFace dataset info to result if created
+        if hf_dataset is not None:
+            result["hf_dataset"] = hf_dataset
+            if hf_dataset_save_path:
+                result["hf_dataset_path"] = hf_dataset_save_path
+        
+        logger.info("Styling pipeline completed successfully!")
+        return result
+
+# Helper functions
+def create_huggingface_config(dataset_name: str, input_field: str = "text", output_field: str = "output", instruction: str = "Rewrite the following text in a formal style", **kwargs) -> StylingConfig:
+    """Helper function to create a HuggingFace configuration"""
+    return StylingConfig(
+        data_source="huggingface",
+        dataset_name=dataset_name,
+        input_field=input_field,
+        output_field=output_field,
+        instruction=instruction,
+        **kwargs
+    )
+
+
+def create_custom_config(data_path: str, data_format: str = "jsonl", input_field: str = "text", output_field: str = "styled_text", instruction: str = "Rewrite the following text in a formal style", **kwargs) -> StylingConfig:
+    """Helper function to create a custom data configuration"""
+    return StylingConfig(
+        data_source="custom",
+        data_path=data_path,
+        data_format=data_format,
+        input_field=input_field,
+        output_field=output_field,
+        instruction=instruction,
+        **kwargs
+    )
+
+
+def save_hf_dataset_to_disk(hf_dataset, save_path: str) -> bool:
+    """Utility function to save HuggingFace dataset to disk"""
+    try:
+        hf_dataset.save_to_disk(save_path)
+        print(f"HuggingFace dataset saved to disk at: {save_path}")
+        return True
+    except Exception as e:
+        print(f"Error saving HuggingFace dataset to disk: {e}")
+        return False
+
+
+def load_hf_dataset_from_disk(load_path: str):
+    """Utility function to load HuggingFace dataset from disk"""
+    try:
+        from datasets import load_from_disk
+        hf_dataset = load_from_disk(load_path)
+        print(f"HuggingFace dataset loaded from disk: {load_path}")
+        print(f"Dataset has {len(hf_dataset)} entries")
+        print(f"Dataset features: {hf_dataset.features}")
+        return hf_dataset
+    except Exception as e:
+        print(f"Error loading HuggingFace dataset from disk: {e}")
+        return None
+
+
+def load_yaml_config(config_path: str) -> Dict[str, Any]:
+    """Load and parse YAML configuration file with proper structure handling"""
+    try:
+        with open(config_path, 'r', encoding='utf-8') as f:
+            yaml_data = yaml.safe_load(f)
+        
+        # Extract configuration from YAML structure
+        config_dict = {}
+        
+        # Handle task section
+        if 'task' in yaml_data:
+            task_data = yaml_data['task']
+            config_dict.update({
+                'task_name': task_data.get('name'),
+                'task_type': task_data.get('type')
+            })
+        
+        # Handle data section
+        if 'data' in yaml_data:
+            data_config = yaml_data['data']
+            config_dict.update({
+                'data_source': data_config.get('source'),
+                'dataset_name': data_config.get('dataset_name'),
+                'data_path': data_config.get('data_path'),
+                'data_format': data_config.get('data_format'),
+                'input_field': data_config.get('input_field'),
+                'output_field': data_config.get('output_field'),
+                'instruction': data_config.get('instruction'),
+                'max_samples': data_config.get('max_samples'),
+                'train_split': data_config.get('train_split'),
+                'validation_split': data_config.get('validation_split'),
+                'test_split': data_config.get('test_split'),
+                'clean_text': data_config.get('clean_text'),
+                'lowercase': data_config.get('lowercase'),
+                'min_length': data_config.get('min_length'),
+                'max_length': data_config.get('max_length'),
+                'output_format': data_config.get('output_format'),
+                'output_dir': data_config.get('output_dir'),
+                'encoding': data_config.get('encoding'),
+                'delimiter': data_config.get('delimiter')
+            })
+        
+        # Handle model section
+        if 'model' in yaml_data:
+            model_data = yaml_data['model']
+            config_dict.update({
+                'model_name': model_data.get('name'),
+                'model_max_length': model_data.get('max_length')
+            })
+        
+        # Handle training section
+        if 'training' in yaml_data:
+            training_data = yaml_data['training']
+            config_dict.update({
+                'num_epochs': training_data.get('num_epochs'),
+                'batch_size': training_data.get('batch_size'),
+                'learning_rate': training_data.get('learning_rate'),
+                'weight_decay': training_data.get('weight_decay'),
+                'warmup_ratio': training_data.get('warmup_ratio'),
+                'lr_scheduler_type': training_data.get('lr_scheduler_type')
+            })
+        
+        # Handle inference section
+        if 'inference' in yaml_data:
+            inference_data = yaml_data['inference']
+            config_dict.update({
+                'inference_batch_size': inference_data.get('batch_size'),
+                'max_new_tokens': inference_data.get('max_new_tokens'),
+                'temperature': inference_data.get('temperature')
+            })
+        
+        logger.info(f"Successfully parsed YAML configuration from: {config_path}")
+        logger.info(f"Extracted {len(config_dict)} configuration parameters")
+        
+        return config_dict
+        
+    except Exception as e:
+        logger.error(f"Error loading YAML config from {config_path}: {e}")
+        raise
+
+
+def main():
+    """Main function with YAML configuration support"""
+    
+    parser = argparse.ArgumentParser(description="Styling Data Processing Pipeline")
+    
+    # YAML configuration
+    parser.add_argument("--config", type=str, help="Path to YAML configuration file")
+    
+    # Data source arguments
+    parser.add_argument("--data-source", choices=["huggingface", "custom"], help="Data source")
+    parser.add_argument("--dataset-name", type=str, help="HuggingFace dataset name")
+    parser.add_argument("--data-path", type=str, help="Path to custom data file")
+    parser.add_argument("--data-format", choices=["jsonl", "csv", "json"], help="Data format")
+    
+    # Field mapping
+    parser.add_argument("--input-field", type=str, help="Input field name")
+    parser.add_argument("--output-field", type=str, help="Output field name")
+    parser.add_argument("--instruction", type=str, help="Style instruction")
+    
+    # Data processing
+    parser.add_argument("--max-samples", type=int, help="Maximum samples to process")
+    parser.add_argument("--train-split", type=float, help="Training split ratio")
+    parser.add_argument("--validation-split", type=float, help="Validation split ratio")
+    parser.add_argument("--test-split", type=float, help="Test split ratio")
+    
+    # Text preprocessing
+    parser.add_argument("--clean-text", action="store_true", help="Clean and normalize text")
+    parser.add_argument("--remove-special-chars", action="store_true", help="Remove special characters")
+    parser.add_argument("--lowercase", action="store_true", help="Convert text to lowercase")
+    parser.add_argument("--min-length", type=int, help="Minimum text length")
+    parser.add_argument("--max-length", type=int, help="Maximum text length")
+    
+    # Output configuration
+    parser.add_argument("--output-format", choices=["styling", "alpaca"], help="Output format")
+    parser.add_argument("--output-dir", type=str, help="Output directory")
+    
+    # HuggingFace dataset options
+    parser.add_argument("--create-hf-dataset", action="store_true", help="Create HuggingFace dataset")
+    parser.add_argument("--hf-dataset-path", type=str, help="Path to save HuggingFace dataset")
+    
+    # Logging
+    parser.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR"], default="INFO", help="Logging level")
+    
+    args = parser.parse_args()
+    
+    # Set up logging
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    
+    # Load configuration
+    config_dict = {}
+    
+    # Load YAML config if provided
+    if args.config:
+        try:
+            config_dict = load_yaml_config(args.config)
+        except Exception as e:
+            logger.error(f"Error loading YAML config: {e}")
+            sys.exit(1)
+    
+    # Override YAML config with CLI arguments
+    cli_overrides = {}
+    if args.data_source:
+        cli_overrides['data_source'] = args.data_source
+    if args.dataset_name:
+        cli_overrides['dataset_name'] = args.dataset_name
+    if args.data_path:
+        cli_overrides['data_path'] = args.data_path
+    if args.data_format:
+        cli_overrides['data_format'] = args.data_format
+    if args.input_field:
+        cli_overrides['input_field'] = args.input_field
+    if args.output_field:
+        cli_overrides['output_field'] = args.output_field
+    if args.instruction:
+        cli_overrides['instruction'] = args.instruction
+    if args.max_samples:
+        cli_overrides['max_samples'] = args.max_samples
+    if args.train_split:
+        cli_overrides['train_split'] = args.train_split
+    if args.validation_split:
+        cli_overrides['validation_split'] = args.validation_split
+    if args.test_split:
+        cli_overrides['test_split'] = args.test_split
+    if args.clean_text:
+        cli_overrides['clean_text'] = True
+    if args.remove_special_chars:
+        cli_overrides['remove_special_chars'] = True
+    if args.lowercase:
+        cli_overrides['lowercase'] = True
+    if args.min_length:
+        cli_overrides['min_length'] = args.min_length
+    if args.max_length:
+        cli_overrides['max_length'] = args.max_length
+    if args.output_format:
+        cli_overrides['output_format'] = args.output_format
+    if args.output_dir:
+        cli_overrides['output_dir'] = args.output_dir
+    
+    # HuggingFace dataset options
+    if args.create_hf_dataset:
+        cli_overrides['create_hf_dataset'] = True
+    if args.hf_dataset_path:
+        cli_overrides['hf_dataset_path'] = args.hf_dataset_path
+    
+    # Logging
+    if args.log_level:
+        cli_overrides['log_level'] = args.log_level
+    
+    # Merge configurations
+    for key, value in cli_overrides.items():
+        if key in config_dict:
+            logger.info(f"Overriding YAML config '{key}' with CLI value: {value}")
+        config_dict[key] = value
+    
+    # Validate required arguments
+    if not config_dict.get('data_source'):
+        parser.error("--data-source is required (either in YAML config or CLI)")
+    
+    if config_dict.get('data_source') == "huggingface" and not config_dict.get('dataset_name'):
+        parser.error("--dataset-name is required for HuggingFace datasets")
+    
+    if config_dict.get('data_source') == "custom" and not config_dict.get('data_path'):
+        parser.error("--data-path is required for custom datasets")
+    
+    # Create configuration object - properly handle YAML structure
+    config = StylingConfig(
+        data_source=config_dict.get('data_source', 'huggingface'),
+        dataset_name=config_dict.get('dataset_name'),
+        data_path=config_dict.get('data_path'),
+        data_format=config_dict.get('data_format', 'jsonl'),
+        input_field=config_dict.get('input_field', 'text'),
+        output_field=config_dict.get('output_field', 'styled_text'),
+        instruction=config_dict.get('instruction', 'Rewrite the following text in a formal style'),
+        max_samples=config_dict.get('max_samples'),
+        train_split=config_dict.get('train_split', 0.8),
+        validation_split=config_dict.get('validation_split', 0.1),
+        test_split=config_dict.get('test_split', 0.1),
+        clean_text=config_dict.get('clean_text', True),
+        remove_special_chars=config_dict.get('remove_special_chars', False),
+        lowercase=config_dict.get('lowercase', False),
+        min_length=config_dict.get('min_length', 10),
+        max_length=config_dict.get('max_length', 1000),
+        output_format=config_dict.get('output_format', 'styling'),
+        output_dir=config_dict.get('output_dir', './data'),
+        hf_split=config_dict.get('hf_split', 'train'),
+        hf_cache_dir=config_dict.get('hf_cache_dir'),
+        test_split_from=config_dict.get('test_split_from', 'train'),
+        val_split_from=config_dict.get('val_split_from', 'train'),
+        encoding=config_dict.get('encoding', 'utf-8'),
+        delimiter=config_dict.get('delimiter', ',')
+    )
+    
+    # Initialize pipeline
+    pipeline = StylingDataPipeline()
+    
+    try:
+        print(f"Starting styling pipeline with {config.data_source} data source...")
+        if args.config:
+            print(f"Using YAML configuration: {args.config}")
+        print(f"Style instruction: {config.instruction}")
+        print()
+        
+        # Check if we should create HuggingFace dataset
+        create_hf_dataset = cli_overrides.get('create_hf_dataset', False)
+        hf_dataset_path = cli_overrides.get('hf_dataset_path')
+        
+        # If creating HF dataset, also save it by default
+        save_hf_dataset = create_hf_dataset
+        
+        result = pipeline.run_pipeline(
+            config, 
+            config.output_format, 
+            save_splits=True, 
+            create_hf_dataset=create_hf_dataset,
+            save_hf_dataset=save_hf_dataset,
+            hf_dataset_path=hf_dataset_path
+        )
+        
+        print(f"✅ Pipeline completed successfully!")
+        print(f"  Data source: {config.data_source}")
+        if config.data_source == "huggingface":
+            print(f"  Dataset: {config.dataset_name}")
+        else:
+            print(f"  Data file: {config.data_path}")
+        print(f"  Total samples: {result['analysis']['overall']['total_samples']}")
+        print(f"  Split sizes: {result['analysis']['overall']['split_sizes']}")
+        print(f"  Output directory: {config.output_dir}")
+        print(f"  Style instruction: {config.instruction}")
+        
+    except Exception as e:
+        print(f"❌ Error running pipeline: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pipelines/styling/inference.py b/pipelines/styling/inference.py
new file mode 100644
index 0000000..9dccafb
--- /dev/null
+++ b/pipelines/styling/inference.py
@@ -0,0 +1,346 @@
+#!/usr/bin/env python3
+"""
+Styling Inference Pipeline using Trained Models
+Supports style transfer inference with streaming and batch processing
+"""
+
+import os
+import sys
+import json
+import logging
+import argparse
+from pathlib import Path
+from typing import Dict, Any, Optional, List, Union
+import yaml
+
+# Add the project root to the path
+sys.path.append(str(Path(__file__).parent.parent.parent))
+
+from utils.config.config_manager import ConfigManager
+from utils.logging.logging import setup_logging
+
+# Inference imports
+import torch
+from datasets import load_from_disk, Dataset
+from unsloth import FastLanguageModel
+from transformers import TextStreamer
+
+logger = logging.getLogger(__name__)
+
+class StylingInference:
+    """Styling task inference using trained models"""
+    
+    def __init__(self, config: Dict[str, Any]):
+        self.config = config
+        self.model = None
+        self.tokenizer = None
+        
+        # Set device
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {self.device}")
+        
+        # Model parameters
+        self.model_path = config.get('model_path')
+        self.max_seq_length = config.get('max_seq_length', 2048)
+        self.dtype = config.get('dtype', None)
+        self.load_in_4bit = config.get('load_in_4bit', True)
+        self.hf_token = config.get('hf_token', None)
+        
+        # Inference parameters
+        self.batch_size = config.get('batch_size', 1)
+        self.max_new_tokens = config.get('max_new_tokens', 128)
+        self.temperature = config.get('temperature', 0.8)
+        self.top_p = config.get('top_p', 0.9)
+        self.do_sample = config.get('do_sample', True)
+        
+        # Alpaca prompt template
+        self.alpaca_prompt = config.get('alpaca_prompt', """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that follows the instruction
+
+### Instruction:
+{}
+
+### Input:
+{}
+
+### Response:
+{}""")
+        
+        # Style instruction
+        self.style_instruction = config.get('style_instruction', 'Rewrite the following text in a formal style')
+    
+    def load_model_and_tokenizer(self):
+        """Load the trained model and tokenizer"""
+        logger.info("Loading model and tokenizer...")
+        
+        try:
+            if self.model_path and Path(self.model_path).exists():
+                # Load local trained model
+                logger.info(f"Loading local model from: {self.model_path}")
+                self.model, self.tokenizer = FastLanguageModel.from_pretrained(
+                    model_name=self.model_path,
+                    max_seq_length=self.max_seq_length,
+                    dtype=self.dtype,
+                    load_in_4bit=self.load_in_4bit,
+                    token=self.hf_token
+                )
+            else:
+                # Load base model from HuggingFace Hub
+                logger.info(f"Loading base model: {self.config.get('base_model_name', 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit')}")
+                self.model, self.tokenizer = FastLanguageModel.from_pretrained(
+                    model_name=self.config.get('base_model_name', 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit'),
+                    max_seq_length=self.max_seq_length,
+                    dtype=self.dtype,
+                    load_in_4bit=self.load_in_4bit,
+                    token=self.hf_token
+                )
+            
+            # Prepare for inference
+            FastLanguageModel.for_inference(self.model)
+            
+            logger.info(f"✅ Model loaded successfully")
+            logger.info(f"✅ Tokenizer loaded with vocab size: {self.tokenizer.vocab_size}")
+            
+        except Exception as e:
+            logger.error(f"❌ Error loading model: {e}")
+            raise
+    
+    def format_prompt(self, instruction: str, input_text: str, output: str = "") -> str:
+        """Format the prompt using Alpaca template"""
+        return self.alpaca_prompt.format(instruction, input_text, output)
+    
+    def generate_text(self, prompt: str, max_new_tokens: Optional[int] = None) -> str:
+        """Generate text from a single prompt"""
+        try:
+            # Tokenize input
+            inputs = self.tokenizer([prompt], return_tensors="pt").to(self.device)
+            
+            # Set generation parameters
+            gen_kwargs = {
+                "max_new_tokens": max_new_tokens or self.max_new_tokens,
+                "temperature": self.temperature,
+                "top_p": self.top_p,
+                "do_sample": self.do_sample,
+                "use_cache": True,
+                "pad_token_id": self.tokenizer.eos_token_id
+            }
+            
+            # Generate
+            with torch.no_grad():
+                outputs = self.model.generate(**inputs, **gen_kwargs)
+            
+            # Decode
+            generated_text = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+            
+            # Extract only the generated part (remove input prompt)
+            if prompt in generated_text:
+                generated_text = generated_text[len(prompt):].strip()
+            
+            return generated_text
+            
+        except Exception as e:
+            logger.error(f"❌ Error generating text: {e}")
+            return ""
+    
+    def style_transfer(self, input_text: str, instruction: Optional[str] = None, streaming: bool = False) -> str:
+        """Perform style transfer on input text"""
+        if instruction is None:
+            instruction = self.style_instruction
+        
+        # Format prompt
+        prompt = self.format_prompt(instruction, input_text, "")
+        
+        logger.info(f"Style transfer prompt: {prompt}")
+        
+        if streaming:
+            logger.info("Generating with streaming...")
+            self.generate_text_streaming(prompt)
+            return ""
+        else:
+            logger.info("Generating text...")
+            result = self.generate_text(prompt)
+            logger.info(f"Generated result: {result}")
+            return result
+    
+    def generate_text_streaming(self, prompt: str, max_new_tokens: Optional[int] = None):
+        """Generate text with streaming output"""
+        try:
+            # Tokenize input
+            inputs = self.tokenizer([prompt], return_tensors="pt").to(self.device)
+            
+            # Setup text streamer
+            text_streamer = TextStreamer(self.tokenizer)
+            
+            # Set generation parameters
+            gen_kwargs = {
+                "max_new_tokens": max_new_tokens or self.max_new_tokens,
+                "temperature": self.temperature,
+                "top_p": self.top_p,
+                "do_sample": self.do_sample,
+                "use_cache": True,
+                "pad_token_id": self.tokenizer.eos_token_id
+            }
+            
+            # Generate with streaming
+            with torch.no_grad():
+                _ = self.model.generate(**inputs, streamer=text_streamer, **gen_kwargs)
+                
+        except Exception as e:
+            logger.error(f"❌ Error in streaming generation: {e}")
+    
+    def batch_style_transfer(self, input_texts: List[str], instruction: Optional[str] = None) -> List[str]:
+        """Perform style transfer on multiple input texts"""
+        results = []
+        
+        for i, input_text in enumerate(input_texts):
+            logger.info(f"Processing text {i+1}/{len(input_texts)}")
+            result = self.style_transfer(input_text, instruction)
+            results.append(result)
+        
+        return results
+
+def load_inference_config(config_path: str) -> Dict[str, Any]:
+    """Load inference configuration from YAML file"""
+    try:
+        with open(config_path, 'r', encoding='utf-8') as f:
+            config = yaml.safe_load(f)
+        
+        # Extract inference configuration
+        inference_config = {}
+        
+        # Model configuration
+        if 'model' in config:
+            model_data = config['model']
+            inference_config.update({
+                'base_model_name': model_data.get('training_model', 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit'),
+                'max_seq_length': model_data.get('training_max_seq_length', 2048),
+                'dtype': model_data.get('training_dtype'),
+                'load_in_4bit': model_data.get('training_load_in_4bit', True),
+                'hf_token': model_data.get('training_token')
+            })
+        
+        # Inference configuration
+        if 'inference' in config:
+            inference_data = config['inference']
+            inference_config.update({
+                'batch_size': inference_data.get('batch_size', 1),
+                'max_new_tokens': inference_data.get('max_new_tokens', 128),
+                'temperature': inference_data.get('temperature', 0.8)
+            })
+        
+        # Style configuration
+        if 'data' in config:
+            data_config = config['data']
+            inference_config.update({
+                'style_instruction': data_config.get('instruction', 'Rewrite the following text in a formal style')
+            })
+        
+        return inference_config
+        
+    except Exception as e:
+        logger.error(f"Error loading inference config: {e}")
+        raise
+
+def main():
+    """Main inference function"""
+    parser = argparse.ArgumentParser(description="Styling Inference Pipeline")
+    
+    # Configuration
+    parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
+    parser.add_argument("--model-path", type=str, help="Path to trained model (optional, uses base model if not provided)")
+    
+    # Inference modes
+    parser.add_argument("--text", type=str, help="Single text to style transfer")
+    parser.add_argument("--input-file", type=str, help="File containing texts to process (one per line)")
+    
+    # Generation parameters
+    parser.add_argument("--max-tokens", type=int, help="Maximum new tokens to generate")
+    parser.add_argument("--temperature", type=float, help="Sampling temperature")
+    parser.add_argument("--streaming", action="store_true", help="Enable streaming generation")
+    parser.add_argument("--instruction", type=str, help="Custom style instruction")
+    
+    # Output
+    parser.add_argument("--output-file", type=str, help="Output file for results")
+    
+    args = parser.parse_args()
+    
+    # Setup logging
+    setup_logging()
+    
+    try:
+        # Load configuration
+        logger.info(f"Loading configuration from: {args.config}")
+        inference_config = load_inference_config(args.config)
+        
+        # Override with CLI arguments
+        if args.model_path:
+            inference_config['model_path'] = args.model_path
+        if args.max_tokens:
+            inference_config['max_new_tokens'] = args.max_tokens
+        if args.temperature:
+            inference_config['temperature'] = args.temperature
+        if args.instruction:
+            inference_config['style_instruction'] = args.instruction
+        
+        logger.info("Inference configuration:")
+        for key, value in inference_config.items():
+            logger.info(f"  {key}: {value}")
+        
+        # Initialize inference
+        inferencer = StylingInference(inference_config)
+        
+        # Load model
+        inferencer.load_model_and_tokenizer()
+        
+        # Run inference based on mode
+        if args.text:
+            # Single text inference
+            logger.info("Running single text inference...")
+            result = inferencer.style_transfer(args.text, args.instruction, args.streaming)
+            if not args.streaming:
+                print(f"\nGenerated text: {result}")
+        
+        elif args.input_file:
+            # Batch file inference
+            logger.info("Running batch file inference...")
+            with open(args.input_file, 'r', encoding='utf-8') as f:
+                input_texts = [line.strip() for line in f if line.strip()]
+            
+            results = inferencer.batch_style_transfer(input_texts, args.instruction)
+            
+            # Save results
+            output_file = args.output_file or f"{Path(args.input_file).stem}_styled.txt"
+            with open(output_file, 'w', encoding='utf-8') as f:
+                for input_text, result in zip(input_texts, results):
+                    f.write(f"Input: {input_text}\n")
+                    f.write(f"Output: {result}\n")
+                    f.write("-" * 50 + "\n")
+            
+            logger.info(f"✅ Results saved to: {output_file}")
+        
+        else:
+            # Interactive mode
+            logger.info("Entering interactive mode. Type 'quit' to exit.")
+            while True:
+                try:
+                    user_input = input("\nEnter text to style (or 'quit'): ").strip()
+                    if user_input.lower() == 'quit':
+                        break
+                    
+                    if user_input:
+                        result = inferencer.style_transfer(user_input, args.instruction, args.streaming)
+                        if not args.streaming:
+                            print(f"\nStyled text: {result}")
+                
+                except KeyboardInterrupt:
+                    break
+                except Exception as e:
+                    logger.error(f"Error processing input: {e}")
+        
+        logger.info("🎉 Inference completed successfully!")
+        
+    except Exception as e:
+        logger.error(f"❌ Inference failed: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/pipelines/styling/train.py b/pipelines/styling/train.py
new file mode 100644
index 0000000..2afaaf8
--- /dev/null
+++ b/pipelines/styling/train.py
@@ -0,0 +1,446 @@
+#!/usr/bin/env python3
+"""
+Styling Training Pipeline using Unsloth and SFTTrainer
+Supports style transfer tasks with LoRA fine-tuning
+"""
+
+import os
+import sys
+import json
+import logging
+import argparse
+from pathlib import Path
+from typing import Dict, Any, Optional
+import yaml
+
+# Add the project root to the path
+sys.path.append(str(Path(__file__).parent.parent.parent))
+
+from utils.config.config_manager import ConfigManager
+#from utils.logging.logging import setup_logging
+
+# Training imports
+import torch
+from datasets import load_from_disk, Dataset
+from unsloth import FastLanguageModel, is_bfloat16_supported
+from trl import SFTTrainer
+from transformers import TrainingArguments
+
+logger = logging.getLogger(__name__)
+
+class StylingTrainer:
+    """Styling task trainer using Unsloth and SFTTrainer"""
+    
+    def __init__(self, config: Dict[str, Any]):
+        self.config = config
+        self.model = None
+        self.tokenizer = None
+        self.trainer = None
+        
+        # Set device
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {self.device}")
+        
+        # Training parameters
+        self.max_seq_length = config.get('max_seq_length', 2048)
+        self.dtype = config.get('dtype', None)
+        self.load_in_4bit = config.get('load_in_4bit', True)
+        self.hf_token = config.get('hf_token', None)
+        
+        # LoRA parameters
+        self.lora_r = config.get('lora_r', 16)
+        self.lora_alpha = config.get('lora_alpha', 16)
+        self.lora_dropout = config.get('lora_dropout', 0)
+        self.target_modules = config.get('target_modules', [
+            "q_proj", "k_proj", "v_proj", "o_proj",
+            "gate_proj", "up_proj", "down_proj"
+        ])
+        
+        # Training arguments
+        self.batch_size = config.get('batch_size', 2)
+        self.gradient_accumulation_steps = config.get('gradient_accumulation_steps', 4)
+        self.learning_rate = config.get('learning_rate', 2e-4)
+        self.num_epochs = config.get('num_epochs', 1)
+        self.max_steps = config.get('max_steps', None)
+        self.warmup_steps = config.get('warmup_steps', 5)
+        self.weight_decay = config.get('weight_decay', 0.01)
+        self.seed = config.get('seed', 3407)
+        
+        # Output paths
+        self.output_dir = config.get('output_dir', './outputs')
+        self.model_output_dir = config.get('model_output_dir', './models/styling')
+        
+    def load_model_and_tokenizer(self):
+        """Load the pre-trained model and tokenizer"""
+        logger.info("Loading model and tokenizer...")
+        
+        try:
+            self.model, self.tokenizer = FastLanguageModel.from_pretrained(
+                model_name=self.config['model_name'],
+                max_seq_length=self.max_seq_length,
+                dtype=self.dtype,
+                load_in_4bit=self.load_in_4bit,
+                token=self.hf_token
+            )
+            
+            logger.info(f"✅ Model loaded: {self.config['model_name']}")
+            logger.info(f"✅ Tokenizer loaded with vocab size: {self.tokenizer.vocab_size}")
+            
+        except Exception as e:
+            logger.error(f"❌ Error loading model: {e}")
+            raise
+    
+    def setup_lora(self):
+        """Setup LoRA for efficient fine-tuning"""
+        logger.info("Setting up LoRA configuration...")
+        
+        try:
+            self.model = FastLanguageModel.get_peft_model(
+                self.model,
+                r=self.lora_r,
+                target_modules=self.target_modules,
+                lora_alpha=self.lora_alpha,
+                lora_dropout=self.lora_dropout,
+                bias="none",
+                use_gradient_checkpointing="unsloth",
+                random_state=self.seed,
+                use_rslora=False,
+                loftq_config=None
+            )
+            
+            logger.info(f"✅ LoRA configured with r={self.lora_r}, alpha={self.lora_alpha}")
+            
+        except Exception as e:
+            logger.error(f"❌ Error setting up LoRA: {e}")
+            raise
+    
+    def load_dataset(self, dataset_path: str) -> Dataset:
+        """Load the training dataset"""
+        logger.info(f"Loading dataset from: {dataset_path}")
+        
+        try:
+            if Path(dataset_path).exists():
+                # Check if it's a HuggingFace dataset directory
+                if (Path(dataset_path) / "dataset_info.json").exists():
+                    # Load from HuggingFace dataset directory
+                    dataset = load_from_disk(dataset_path)
+                    logger.info(f"Loaded HuggingFace dataset from disk: {len(dataset)} samples")
+                else:
+                    # Load from processed data files (JSONL format)
+                    logger.info("Loading from processed data files...")
+                    from datasets import Dataset
+                    import json
+                    
+                    all_data = []
+                    data_dir = Path(dataset_path)
+                    
+                    # Look for train.jsonl, validation.jsonl, test.jsonl
+                    for split_file in ["train.jsonl", "validation.jsonl", "test.jsonl"]:
+                        file_path = data_dir / split_file
+                        if file_path.exists():
+                            logger.info(f"Loading {split_file}...")
+                            with open(file_path, 'r', encoding='utf-8') as f:
+                                for line in f:
+                                    if line.strip():
+                                        data = json.loads(line)
+                                        all_data.append(data)
+                    
+                    if not all_data:
+                        raise ValueError(f"No data found in {dataset_path}")
+                    
+                    # Create HuggingFace dataset
+                    dataset = Dataset.from_list(all_data)
+                    logger.info(f"Created HuggingFace dataset from {len(all_data)} samples")
+            else:
+                # Try loading from HuggingFace Hub
+                logger.info(f"Attempting to load from HuggingFace Hub: {dataset_path}")
+                dataset = Dataset.load_dataset(dataset_path, split="train")
+                logger.info(f"Loaded from HuggingFace Hub: {len(dataset)} samples")
+            
+            logger.info(f"Dataset loaded: {len(dataset)} samples")
+            logger.info(f"Dataset features: {dataset.features}")
+            
+            # Verify required fields exist
+            required_fields = ["instruction", "input", "output"]
+            missing_fields = [field for field in required_fields if field not in dataset.features]
+            if missing_fields:
+                raise ValueError(f"Missing required fields in dataset: {missing_fields}")
+            
+            return dataset
+            
+        except Exception as e:
+            logger.error(f"Error loading dataset: {e}")
+            raise
+    
+    def setup_trainer(self, train_dataset: Dataset):
+        """Setup the SFTTrainer"""
+        logger.info("Setting up SFTTrainer...")
+        
+        try:
+            # First, map the dataset to create the text field with EOS token
+            def formatting_prompts_func(examples):
+                instructions = examples["instruction"]
+                inputs = examples["input"]
+                outputs = examples["output"]
+                texts = []
+                
+                for instruction, input_text, output in zip(instructions, inputs, outputs):
+                    # Must add EOS_TOKEN, otherwise your generation will go on forever!
+                    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that follows the instruction
+
+### Instruction:
+{}
+
+### Input:
+{}
+
+### Response:
+{}"""
+                    text = alpaca_prompt.format(instruction, input_text, output) + self.tokenizer.eos_token
+                    texts.append(text)
+                
+                return {"text": texts}
+            
+            # Apply the formatting function to create the text field
+            logger.info("Mapping dataset to create text field with EOS token...")
+            formatted_dataset = train_dataset.map(formatting_prompts_func, batched=True, remove_columns=train_dataset.column_names)
+            
+            logger.info(f"Dataset mapped successfully. New features: {formatted_dataset.features}")
+            logger.info(f"Sample text field: {formatted_dataset[0]['text'][:100]}...")
+            
+            # Training arguments
+            training_args = TrainingArguments(
+                per_device_train_batch_size=self.batch_size,
+                gradient_accumulation_steps=self.gradient_accumulation_steps,
+                warmup_steps=self.warmup_steps,
+                num_train_epochs=self.num_epochs,
+                max_steps=self.max_steps,
+                learning_rate=self.learning_rate,
+                fp16=not is_bfloat16_supported(),
+                bf16=is_bfloat16_supported(),
+                logging_steps=1,
+                optim="adamw_8bit",
+                weight_decay=self.weight_decay,
+                lr_scheduler_type="linear",
+                seed=self.seed,
+                output_dir=self.output_dir,
+                report_to="none",  # Disable wandb for now
+                save_strategy="epoch",
+                save_total_limit=2,
+                evaluation_strategy="no",  # No validation for now
+                load_best_model_at_end=False,
+                remove_unused_columns=False,
+                dataloader_pin_memory=False,
+            )
+            
+            # Create trainer with the formatted dataset
+            self.trainer = SFTTrainer(
+                model=self.model,
+                tokenizer=self.tokenizer,
+                train_dataset=formatted_dataset,  # Use the formatted dataset
+                dataset_text_field="text",  # The field we just created
+                max_seq_length=self.max_seq_length,
+                dataset_num_proc=2,
+                packing=False,  # Can make training 5x faster for short sequences
+                args=training_args
+            )
+            
+            logger.info("SFTTrainer configured successfully")
+            
+        except Exception as e:
+            logger.error(f"Error setting up trainer: {e}")
+            raise
+    
+    def train(self, dataset_path: str):
+        """Run the training process"""
+        logger.info("🚀 Starting training process...")
+        
+        try:
+            # Load model and tokenizer
+            self.load_model_and_tokenizer()
+            
+            # Setup LoRA
+            self.setup_lora()
+            
+            # Load dataset
+            train_dataset = self.load_dataset(dataset_path)
+            
+            # Setup trainer
+            self.setup_trainer(train_dataset)
+            
+            # Start training
+            logger.info("Starting training...")
+            trainer_stats = self.trainer.train()
+            
+            logger.info("✅ Training completed successfully!")
+            logger.info(f"Training stats: {trainer_stats}")
+            
+            # Save the model
+            self.save_model()
+            
+            return trainer_stats
+            
+        except Exception as e:
+            logger.error(f"❌ Training failed: {e}")
+            raise
+    
+    def save_model(self):
+        """Save the trained model"""
+        logger.info("Saving trained model...")
+        
+        try:
+            # Create output directory
+            Path(self.model_output_dir).mkdir(parents=True, exist_ok=True)
+            
+            # Save model and tokenizer
+            self.model.save_pretrained(self.model_output_dir)
+            self.tokenizer.save_pretrained(self.model_output_dir)
+            
+            # Save training config
+            config_path = Path(self.model_output_dir) / "training_config.json"
+            with open(config_path, 'w') as f:
+                json.dump(self.config, f, indent=2)
+            
+            logger.info(f"✅ Model saved to: {self.model_output_dir}")
+            
+        except Exception as e:
+            logger.error(f"❌ Error saving model: {e}")
+            raise
+    
+    def prepare_for_inference(self):
+        """Prepare model for inference"""
+        logger.info("Preparing model for inference...")
+        
+        try:
+            FastLanguageModel.for_inference(self.model)
+            logger.info("✅ Model prepared for inference")
+            
+        except Exception as e:
+            logger.error(f"❌ Error preparing for inference: {e}")
+            raise
+
+def load_training_config(config_path: str) -> Dict[str, Any]:
+    """Load training configuration from YAML file"""
+    try:
+        with open(config_path, 'r', encoding='utf-8') as f:
+            config = yaml.safe_load(f)
+        
+        # Extract training configuration
+        training_config = {}
+        
+        # Model configuration
+        if 'model' in config:
+            model_data = config['model']
+            training_config.update({
+                'model_name': model_data.get('training_model', 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit'),
+                'max_seq_length': model_data.get('training_max_seq_length', 2048),
+                'dtype': model_data.get('training_dtype'),
+                'load_in_4bit': model_data.get('training_load_in_4bit', True),
+                'hf_token': model_data.get('training_token')
+            })
+        
+        # Training configuration
+        if 'training' in config:
+            training_data = config['training']
+            training_config.update({
+                'num_epochs': training_data.get('num_epochs', 3),
+                'batch_size': training_data.get('batch_size', 2),
+                'learning_rate': training_data.get('learning_rate', 2e-4),
+                'weight_decay': training_data.get('weight_decay', 0.01),
+                'warmup_ratio': training_data.get('warmup_ratio', 0.1),
+                'lr_scheduler_type': training_data.get('lr_scheduler_type', 'linear')
+            })
+        
+        # Data configuration - use output_dir from data section
+        if 'data' in config:
+            data_config = config['data']
+            output_dir = data_config.get('output_dir', './data/processed/styling')
+            training_config.update({
+                'data_output_dir': output_dir,
+                'dataset_path': output_dir,  # Default dataset path is the output_dir
+                'style_instruction': data_config.get('instruction', 'Rewrite the following text in a formal style')
+            })
+        
+        # LoRA configuration
+        training_config.update({
+            'lora_r': 16,
+            'lora_alpha': 16,
+            'lora_dropout': 0,
+            'target_modules': [
+                "q_proj", "k_proj", "v_proj", "o_proj",
+                "gate_proj", "up_proj", "down_proj"
+            ],
+            'gradient_accumulation_steps': 4,
+            'max_steps': None,
+            'warmup_steps': 5,
+            'seed': 3407,
+            'output_dir': './outputs',
+            'model_output_dir': './models/styling'
+        })
+        
+        return training_config
+        
+    except Exception as e:
+        logger.error(f"Error loading training config: {e}")
+        raise
+
+def main():
+    """Main training function"""
+    parser = argparse.ArgumentParser(description="Styling Training Pipeline")
+    
+    # Configuration
+    parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
+    parser.add_argument("--dataset", type=str, help="Path to training dataset (HF dataset path or local path)")
+    parser.add_argument("--output-dir", type=str, help="Output directory for model")
+    parser.add_argument("--epochs", type=int, help="Number of training epochs")
+    parser.add_argument("--batch-size", type=int, help="Training batch size")
+    parser.add_argument("--learning-rate", type=float, help="Learning rate")
+    parser.add_argument("--max-steps", type=int, help="Maximum training steps")
+    
+    args = parser.parse_args()
+    
+    # Setup logging
+    # setup_logging()  # Commented out as per user's change
+    
+    try:
+        # Load configuration
+        logger.info(f"Loading configuration from: {args.config}")
+        training_config = load_training_config(args.config)
+        
+        # Override with CLI arguments
+        if args.output_dir:
+            training_config['model_output_dir'] = args.output_dir
+        if args.epochs:
+            training_config['num_epochs'] = args.epochs
+        if args.batch_size:
+            training_config['batch_size'] = args.batch_size
+        if args.learning_rate:
+            training_config['learning_rate'] = args.learning_rate
+        if args.max_steps:
+            training_config['max_steps'] = args.max_steps
+        
+        # Determine dataset path: CLI argument takes precedence, then YAML config
+        dataset_path = args.dataset or training_config.get('dataset_path')
+        if not dataset_path:
+            logger.error("No dataset path provided. Use --dataset or ensure output_dir is set in YAML config.")
+            sys.exit(1)
+        
+        logger.info("Training configuration:")
+        for key, value in training_config.items():
+            logger.info(f"  {key}: {value}")
+        logger.info(f"  Dataset path: {dataset_path}")
+        
+        # Initialize trainer
+        trainer = StylingTrainer(training_config)
+        
+        # Start training
+        trainer.train(dataset_path)
+        
+        logger.info("Training completed successfully!")
+        
+    except Exception as e:
+        logger.error(f"Training failed: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/styling/__init__.py b/scripts/styling/__init__.py
new file mode 100644
index 0000000..5e53d8b
--- /dev/null
+++ b/scripts/styling/__init__.py
@@ -0,0 +1,45 @@
+"""
+Styling Scripts Package
+Provides command-line interfaces for styling data processing, training, and inference
+"""
+
+from .data_processor import (
+    run_with_yaml_config,
+    run_styling_examples,
+    create_sample_styling_data,
+    create_custom_styling_config,
+    show_styling_features
+)
+
+from .train import (
+    run_training_with_config,
+    create_training_example,
+    show_training_features
+)
+
+from .inference import (
+    run_inference_with_config,
+    create_inference_example,
+    run_batch_inference_example,
+    show_inference_features
+)
+
+__all__ = [
+    # Data processing
+    'run_with_yaml_config',
+    'run_styling_examples',
+    'create_sample_styling_data',
+    'create_custom_styling_config',
+    'show_styling_features',
+    
+    # Training
+    'run_training_with_config',
+    'create_training_example',
+    'show_training_features',
+    
+    # Inference
+    'run_inference_with_config',
+    'create_inference_example',
+    'run_batch_inference_example',
+    'show_inference_features'
+]
diff --git a/scripts/styling/data_processor.py b/scripts/styling/data_processor.py
new file mode 100644
index 0000000..fb18c63
--- /dev/null
+++ b/scripts/styling/data_processor.py
@@ -0,0 +1,302 @@
+#!/usr/bin/env python3
+"""
+Styling data processor script that uses YAML configurations.
+This provides a flexible and maintainable approach for style transfer tasks.
+"""
+
+import sys
+import os
+import subprocess
+import argparse
+from pathlib import Path
+
+def run_with_yaml_config(config_path: str, **cli_overrides):
+    """Run styling data processor with YAML configuration"""
+    print(f"=== Running Styling Data Processor with YAML config: {config_path} ===")
+    
+    cmd = [
+        "python", "pipelines/styling/data_processor.py",
+        "--config", config_path
+    ]
+    
+    # Add CLI overrides
+    for key, value in cli_overrides.items():
+        if value is not None:
+            cmd.extend([f"--{key.replace('_', '-')}", str(value)])
+    
+    print(f"Running command: {' '.join(cmd)}")
+    print()
+    
+    try:
+        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+        print("✅ Styling data processing completed successfully!")
+        print(result.stdout)
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Error running styling data processor: {e}")
+        print(f"Error output: {e.stderr}")
+        return False
+
+def run_styling_examples():
+    """Run styling examples with YAML configs"""
+    
+    # Example 1: Formal style transfer
+    print("=== Example 1: Formal Style Transfer ===")
+    success = run_with_yaml_config(
+        "configs/styling/formal.yaml",
+        max_samples=1000,  # Override YAML value
+        output_format="alpaca"
+    )
+    
+    if success:
+        print("✅ Formal style transfer completed!")
+    
+    # Example 2: Custom styling dataset (if available)
+    print("\n=== Example 2: Custom Styling Dataset ===")
+    if os.path.exists("data/raw/styling/custom_dataset.jsonl"):
+        success = run_with_yaml_config(
+            "configs/styling/formal.yaml",  # Use formal config as base
+            data_source="custom",
+            data_path="data/raw/styling/custom_dataset.jsonl",
+            instruction="Rewrite the following text in a casual, friendly style",
+            output_dir="./data/processed/styling/casual"
+        )
+        if success:
+            print("✅ Custom styling dataset processing completed!")
+    else:
+        print("⚠️  Custom styling dataset not found, skipping...")
+        print("   You can create one with the 'create-sample-data' option")
+
+def create_sample_styling_data():
+    """Create sample styling dataset for testing"""
+    sample_data = [
+        {
+            "text": "Hey, what's up? How are you doing today?",
+            "styled_text": "Hello, how are you doing today?"
+        },
+        {
+            "text": "This is really cool stuff!",
+            "styled_text": "This is quite impressive material."
+        },
+        {
+            "text": "I'm gonna go to the store later.",
+            "styled_text": "I will go to the store later."
+        },
+        {
+            "text": "What's the deal with this?",
+            "styled_text": "What is the situation regarding this matter?"
+        },
+        {
+            "text": "That's totally awesome!",
+            "styled_text": "That is quite remarkable!"
+        }
+    ]
+    
+    # Create directory structure
+    data_dir = Path("data/raw/styling")
+    data_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Save sample data
+    import json
+    sample_file = data_dir / "sample_formal.jsonl"
+    with open(sample_file, 'w', encoding='utf-8') as f:
+        for item in sample_data:
+            f.write(json.dumps(item, ensure_ascii=False) + '\n')
+    
+    print(f"✅ Created sample styling dataset: {sample_file}")
+    print(f"   Contains {len(sample_data)} examples")
+    print(f"   Format: text → styled_text")
+    print(f"   Ready to use with configs/styling/formal.yaml")
+
+def create_custom_styling_config():
+    """Create a custom styling configuration file"""
+    custom_config = """task:
+  name: "styling"
+  type: "style_transfer"
+
+data:
+  source: "custom"
+  input_field: "text"
+  output_field: "styled_text"
+  instruction: "Rewrite the following text in a professional business style"
+  data_format: "jsonl"
+  max_length: 512
+  min_length: 10
+  clean_text: true
+  lowercase: false
+  train_split: 0.8
+  validation_split: 0.1
+  test_split: 0.1
+  output_format: "alpaca"
+  output_dir: "./data/processed/styling/professional"
+
+model:
+  name: "t5-base"
+  max_length: 512
+
+training:
+  num_epochs: 3
+  batch_size: 16
+  learning_rate: 3e-5
+  weight_decay: 0.01
+  warmup_ratio: 0.1
+  lr_scheduler_type: "linear"
+
+inference:
+  batch_size: 32
+  max_new_tokens: 128
+  temperature: 0.8
+"""
+    
+    config_path = "configs/styling/professional.yaml"
+    os.makedirs(os.path.dirname(config_path), exist_ok=True)
+    
+    with open(config_path, 'w') as f:
+        f.write(custom_config)
+    
+    print(f"✅ Created custom styling config: {config_path}")
+    print("   This config is set up for professional business style transfer")
+
+def handle_direct_args():
+    """Handle direct command-line arguments by passing them to the styling pipeline"""
+    parser = argparse.ArgumentParser(description="Styling Data Processor")
+    
+    # Add all the same arguments as the styling pipeline
+    parser.add_argument("--config", type=str, help="Path to YAML configuration file")
+    parser.add_argument("--data-source", choices=["huggingface", "custom"], help="Data source")
+    parser.add_argument("--dataset-name", type=str, help="HuggingFace dataset name")
+    parser.add_argument("--data-path", type=str, help="Path to custom data file")
+    parser.add_argument("--data-format", choices=["jsonl", "csv", "json"], help="Data format")
+    parser.add_argument("--input-field", type=str, help="Input field name")
+    parser.add_argument("--output-field", type=str, help="Output field name")
+    parser.add_argument("--instruction", type=str, help="Style instruction")
+    parser.add_argument("--max-samples", type=int, help="Maximum samples to process")
+    parser.add_argument("--train-split", type=float, help="Training split ratio")
+    parser.add_argument("--validation-split", type=float, help="Validation split ratio")
+    parser.add_argument("--test-split", type=float, help="Test split ratio")
+    parser.add_argument("--clean-text", action="store_true", help="Clean and normalize text")
+    parser.add_argument("--remove-special-chars", action="store_true", help="Remove special characters")
+    parser.add_argument("--lowercase", action="store_true", help="Convert text to lowercase")
+    parser.add_argument("--min-length", type=int, help="Minimum text length")
+    parser.add_argument("--max-length", type=int, help="Maximum text length")
+    parser.add_argument("--output-format", choices=["styling", "alpaca"], help="Output format")
+    parser.add_argument("--output-dir", type=str, help="Output directory")
+    
+    # HuggingFace dataset options
+    parser.add_argument("--create-hf-dataset", action="store_true", help="Create HuggingFace dataset")
+    parser.add_argument("--hf-dataset-path", type=str, help="Path to save HuggingFace dataset")
+    
+    # Logging
+    parser.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR"], default="INFO", help="Logging level")
+    
+    args = parser.parse_args()
+    
+    # Build command to call the styling pipeline
+    cmd = ["python", "pipelines/styling/data_processor.py"]
+    
+    # Add all arguments that were provided
+    for arg_name, arg_value in vars(args).items():
+        if arg_value is not None:
+            if isinstance(arg_value, bool):
+                if arg_value:  # Only add flag if True
+                    cmd.append(f"--{arg_name.replace('_', '-')}")
+            else:
+                cmd.extend([f"--{arg_name.replace('_', '-')}", str(arg_value)])
+    
+    print(f"Running: {' '.join(cmd)}")
+    print()
+    
+    try:
+        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+        print("✅ Styling data processing completed successfully!")
+        print(result.stdout)
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Error running styling data processor: {e}")
+        print(f"Error output: {e.stderr}")
+        return False
+
+def show_styling_features():
+    """Show the features of the styling data processor"""
+    print("=== Styling Data Processor Features ===")
+    print()
+    print("1. **Style Transfer Tasks**:")
+    print("   - Formal vs. Informal style")
+    print("   - Professional vs. Casual tone")
+    print("   - Academic vs. Conversational")
+    print("   - Any custom style instruction")
+    print()
+    print("2. **Data Formats Supported**:")
+    print("   - HuggingFace datasets")
+    print("   - Custom JSONL/CSV/JSON files")
+    print("   - Automatic train/validation/test splits")
+    print()
+    print("3. **Output Formats**:")
+    print("   - Raw styling format (input/output)")
+    print("   - Alpaca format (instruction/input/output)")
+    print("   - HuggingFace dataset format")
+    print()
+    print("4. **Advanced Features**:")
+    print("   - Configurable field mapping")
+    print("   - Text preprocessing options")
+    print("   - Automatic dataset saving/loading")
+    print("   - YAML configuration support")
+    print()
+    print("=== Usage Examples ===")
+    print()
+    print("1. Use YAML config only:")
+    print("   python scripts/styling/data_processor.py --config configs/styling/formal.yaml")
+    print()
+    print("2. Override YAML values:")
+    print("   python scripts/styling/data_processor.py --config configs/styling/formal.yaml --max-samples 500")
+    print()
+    print("3. Create sample data:")
+    print("   python scripts/styling/data_processor.py create-sample-data")
+    print()
+    print("4. Create custom config:")
+    print("   python scripts/styling/data_processor.py create-config")
+
+def main():
+    """Main function"""
+    if len(sys.argv) > 1:
+        # Check if it's a subcommand
+        if sys.argv[1] in ["examples", "create-sample-data", "create-config", "features"]:
+            # Handle subcommands
+            if sys.argv[1] == "examples":
+                run_styling_examples()
+            elif sys.argv[1] == "create-sample-data":
+                create_sample_styling_data()
+            elif sys.argv[1] == "create-config":
+                create_custom_styling_config()
+            elif sys.argv[1] == "features":
+                show_styling_features()
+        else:
+            # Handle direct arguments (pass through to pipeline)
+            handle_direct_args()
+    else:
+        print("Styling Data Processor")
+        print("=====================")
+        print()
+        print("This script runs the styling data processor for style transfer tasks.")
+        print("It supports both YAML configurations and command-line overrides.")
+        print()
+        print("Usage:")
+        print("  python scripts/styling/data_processor.py examples           # Run examples")
+        print("  python scripts/styling/data_processor.py create-sample-data # Create sample dataset")
+        print("  python scripts/styling/data_processor.py create-config      # Create custom config")
+        print("  python scripts/styling/data_processor.py features           # Show features")
+        print()
+        print("Direct pipeline usage:")
+        print("  python scripts/styling/data_processor.py --config configs/styling/formal.yaml")
+        print("  python scripts/styling/data_processor.py --data-source custom --data-path ./data.jsonl")
+        print()
+        print("Key Features:")
+        print("  ✅ Style transfer with custom instructions")
+        print("  ✅ Multiple data source support")
+        print("  ✅ YAML configuration files")
+        print("  ✅ CLI argument overrides")
+        print("  ✅ Automatic data splitting")
+        print("  ✅ HuggingFace dataset export")
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/styling/inference.py b/scripts/styling/inference.py
new file mode 100644
index 0000000..08beb8f
--- /dev/null
+++ b/scripts/styling/inference.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+"""
+Styling Inference Script
+Provides a command-line interface to run the styling inference pipeline
+"""
+
+import sys
+import os
+import subprocess
+import argparse
+from pathlib import Path
+
+def run_inference_with_config(config_path: str, **cli_overrides):
+    """Run the styling inference pipeline with YAML configuration"""
+    print(f"🚀 Starting styling inference with config: {config_path}")
+    print()
+    
+    # Build command
+    cmd = ["python", "pipelines/styling/inference.py", "--config", config_path]
+    
+    # Add CLI overrides
+    for key, value in cli_overrides.items():
+        if value is not None:
+            if key == "model_path":
+                cmd.extend(["--model-path", str(value)])
+            elif key == "text":
+                cmd.extend(["--text", str(value)])
+            elif key == "input_file":
+                cmd.extend(["--input-file", str(value)])
+            elif key == "max_tokens":
+                cmd.extend(["--max-tokens", str(value)])
+            elif key == "temperature":
+                cmd.extend(["--temperature", str(value)])
+            elif key == "instruction":
+                cmd.extend(["--instruction", str(value)])
+            elif key == "output_file":
+                cmd.extend(["--output-file", str(value)])
+            elif key == "streaming":
+                cmd.append("--streaming")
+    
+    print(f"Running: {' '.join(cmd)}")
+    print()
+    
+    try:
+        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+        print("✅ Inference completed successfully!")
+        print(result.stdout)
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Inference failed: {e}")
+        print(f"Error output: {e.stderr}")
+        return False
+
+def show_inference_features():
+    """Show the features of the styling inference pipeline"""
+    print("=== Styling Inference Pipeline Features ===")
+    print()
+    print("1. **Model Support**:")
+    print("   - Trained LoRA models")
+    print("   - Base models from HuggingFace Hub")
+    print("   - Automatic model loading and preparation")
+    print()
+    print("2. **Inference Modes**:")
+    print("   - Single text inference")
+    print("   - Batch file processing")
+    print("   - Interactive mode")
+    print("   - Streaming generation")
+    print()
+    print("3. **Generation Control**:")
+    print("   - Configurable temperature and top-p")
+    print("   - Adjustable max tokens")
+    print("   - Custom style instructions")
+    print()
+    print("4. **Output Options**:")
+    print("   - Console output")
+    print("   - File output")
+    print("   - Streaming real-time generation")
+
+def create_inference_example():
+    """Create an inference example using the formal style configuration"""
+    print("=== Inference Example: Formal Style Transfer ===")
+    print()
+    
+    # Check if we have the required files
+    config_path = "configs/styling/formal.yaml"
+    
+    if not Path(config_path).exists():
+        print(f"❌ Configuration file not found: {config_path}")
+        print("   Please run the data processor first to create the configuration")
+        return False
+    
+    print("✅ Found configuration file!")
+    print(f"   Config: {config_path}")
+    print()
+    
+    # Example text
+    example_text = "Hey, what's up? I'm gonna go grab some food later."
+    
+    print(f"📝 Example text: {example_text}")
+    print()
+    
+    # Run inference
+    success = run_inference_with_config(
+        config_path=config_path,
+        text=example_text,
+        instruction="Rewrite the following text in a formal style"
+    )
+    
+    if success:
+        print("🎉 Inference example completed!")
+    
+    return success
+
+def create_test_file():
+    """Create a test file with sample texts for batch inference"""
+    test_file = "test_texts.txt"
+    
+    test_texts = [
+        "Hey, what's up? How are you doing today?",
+        "I'm gonna go to the store later to get some stuff.",
+        "This is pretty cool, right?",
+        "Can you help me out with this?",
+        "Thanks a lot for your help!"
+    ]
+    
+    with open(test_file, 'w', encoding='utf-8') as f:
+        for text in test_texts:
+            f.write(text + '\n')
+    
+    print(f"✅ Created test file: {test_file}")
+    print(f"   Contains {len(test_texts)} sample texts")
+    return test_file
+
+def run_batch_inference_example():
+    """Run a batch inference example"""
+    print("=== Batch Inference Example ===")
+    print()
+    
+    # Create test file
+    test_file = create_test_file()
+    
+    # Check configuration
+    config_path = "configs/styling/formal.yaml"
+    if not Path(config_path).exists():
+        print(f"❌ Configuration file not found: {config_path}")
+        return False
+    
+    print("✅ Running batch inference...")
+    print()
+    
+    # Run batch inference
+    success = run_inference_with_config(
+        config_path=config_path,
+        input_file=test_file,
+        output_file="styled_results.txt",
+        instruction="Rewrite the following text in a formal style"
+    )
+    
+    if success:
+        print("🎉 Batch inference completed!")
+        print("   Results saved to: styled_results.txt")
+    
+    return success
+
+def main():
+    """Main function"""
+    parser = argparse.ArgumentParser(description="Styling Inference Script")
+    
+    # Subcommands
+    parser.add_argument("command", choices=["infer", "example", "batch", "features"], 
+                       help="Command to run")
+    
+    # Inference arguments
+    parser.add_argument("--config", type=str, help="Path to YAML configuration file")
+    parser.add_argument("--model-path", type=str, help="Path to trained model")
+    parser.add_argument("--text", type=str, help="Single text to style transfer")
+    parser.add_argument("--input-file", type=str, help="File containing texts to process")
+    parser.add_argument("--max-tokens", type=int, help="Maximum new tokens to generate")
+    parser.add_argument("--temperature", type=float, help="Sampling temperature")
+    parser.add_argument("--instruction", type=str, help="Custom style instruction")
+    parser.add_argument("--output-file", type=str, help="Output file for results")
+    parser.add_argument("--streaming", action="store_true", help="Enable streaming generation")
+    
+    args = parser.parse_args()
+    
+    if args.command == "features":
+        show_inference_features()
+    
+    elif args.command == "example":
+        create_inference_example()
+    
+    elif args.command == "batch":
+        run_batch_inference_example()
+    
+    elif args.command == "infer":
+        if not args.config:
+            print("❌ --config is required for inference")
+            print("Usage: python scripts/styling/inference.py infer --config config.yaml [options]")
+            sys.exit(1)
+        
+        # Check if we have input
+        if not args.text and not args.input_file:
+            print("❌ Either --text or --input-file is required")
+            print("Usage: python scripts/styling/inference.py infer --config config.yaml --text 'your text'")
+            sys.exit(1)
+        
+        success = run_inference_with_config(
+            config_path=args.config,
+            model_path=args.model_path,
+            text=args.text,
+            input_file=args.input_file,
+            max_tokens=args.max_tokens,
+            temperature=args.temperature,
+            instruction=args.instruction,
+            output_file=args.output_file,
+            streaming=args.streaming
+        )
+        
+        if not success:
+            sys.exit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/styling/train.py b/scripts/styling/train.py
new file mode 100644
index 0000000..7742320
--- /dev/null
+++ b/scripts/styling/train.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+"""
+Styling Training Script
+Provides a command-line interface to run the styling training pipeline
+"""
+
+import sys
+import os
+import subprocess
+import argparse
+from pathlib import Path
+
+def run_training_with_config(config_path: str, dataset_path: str = None, **cli_overrides):
+    """Run the styling training pipeline with YAML configuration"""
+    print(f"Starting styling training with config: {config_path}")
+    if dataset_path:
+        print(f"Training dataset: {dataset_path}")
+    else:
+        print("Training dataset: Will use output_dir from YAML config")
+    print()
+    
+    # Build command
+    cmd = ["python", "pipelines/styling/train.py", "--config", config_path]
+    
+    # Add dataset path if provided
+    if dataset_path:
+        cmd.extend(["--dataset", dataset_path])
+    
+    # Add CLI overrides
+    for key, value in cli_overrides.items():
+        if value is not None:
+            if key == "output_dir":
+                cmd.extend(["--output-dir", str(value)])
+            elif key == "epochs":
+                cmd.extend(["--epochs", str(value)])
+            elif key == "batch_size":
+                cmd.extend(["--batch-size", str(value)])
+            elif key == "learning_rate":
+                cmd.extend(["--learning-rate", str(value)])
+            elif key == "max_steps":
+                cmd.extend(["--max-steps", str(value)])
+    
+    print(f"Running: {' '.join(cmd)}")
+    print()
+    
+    try:
+        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+        print("Training completed successfully!")
+        print(result.stdout)
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"Training failed: {e}")
+        print(f"Error output: {e.stderr}")
+        return False
+
+def show_training_features():
+    """Show the features of the styling training pipeline"""
+    print("=== Styling Training Pipeline Features ===")
+    print()
+    print("1. **Model Support**:")
+    print("   - Unsloth optimized models (4x faster)")
+    print("   - LoRA fine-tuning for efficiency")
+    print("   - Support for Llama-3.1, Mistral, Phi-3, Gemma")
+    print()
+    print("2. **Training Features**:")
+    print("   - SFTTrainer with instruction tuning")
+    print("   - Automatic mixed precision (FP16/BF16)")
+    print("   - Gradient checkpointing for memory efficiency")
+    print("   - Configurable LoRA parameters")
+    print()
+    print("3. **Configuration**:")
+    print("   - YAML configuration files")
+    print("   - CLI argument overrides")
+    print("   - Automatic device detection")
+    print()
+    print("4. **Output**:")
+    print("   - Saved LoRA models")
+    print("   - Training logs and checkpoints")
+    print("   - Ready for inference")
+
+def create_training_example():
+    """Create a training example using the formal style configuration"""
+    print("=== Training Example: Formal Style Transfer ===")
+    print()
+    
+    # Check if we have the required files
+    config_path = "configs/styling/formal.yaml"
+    
+    if not Path(config_path).exists():
+        print(f"Configuration file not found: {config_path}")
+        print("   Please run the data processor first to create the configuration")
+        return False
+    
+    print("Found required files!")
+    print(f"   Config: {config_path}")
+    print("   Dataset: Will use output_dir from YAML config")
+    print("   The training pipeline will automatically:")
+    print("   - Load data from the output_dir specified in YAML")
+    print("   - Convert JSONL files to HuggingFace dataset format")
+    print("   - Apply formatting with EOS tokens")
+    print("   - Train the model using SFTTrainer")
+    print()
+    
+    # Run training without explicit dataset path - will use YAML config
+    success = run_training_with_config(
+        config_path=config_path,
+        dataset_path=None,  # Use output_dir from YAML config
+        epochs=1,
+        batch_size=2,
+        learning_rate=2e-4
+    )
+    
+    if success:
+        print("Training example completed!")
+        print("   Model saved to: ./models/styling")
+        print("   Ready for inference!")
+    
+    return success
+
+def main():
+    """Main function"""
+    parser = argparse.ArgumentParser(description="Styling Training Script")
+    
+    # Subcommands
+    parser.add_argument("command", choices=["train", "example", "features"], 
+                       help="Command to run")
+    
+    # Training arguments
+    parser.add_argument("--config", type=str, help="Path to YAML configuration file")
+    parser.add_argument("--dataset", type=str, help="Path to training dataset")
+    parser.add_argument("--output-dir", type=str, help="Output directory for model")
+    parser.add_argument("--epochs", type=int, help="Number of training epochs")
+    parser.add_argument("--batch-size", type=int, help="Training batch size")
+    parser.add_argument("--learning-rate", type=float, help="Learning rate")
+    parser.add_argument("--max-steps", type=int, help="Maximum training steps")
+    
+    args = parser.parse_args()
+    
+    if args.command == "features":
+        show_training_features()
+    
+    elif args.command == "example":
+        create_training_example()
+    
+    elif args.command == "train":
+        if not args.config:
+            print("❌ --config is required for training")
+            print("Usage: python scripts/styling/train.py train --config config.yaml")
+            sys.exit(1)
+        
+        # If dataset is not provided, try to use output_dir from config
+        dataset_path = args.dataset if args.dataset else None
+        
+        success = run_training_with_config(
+            config_path=args.config,
+            dataset_path=dataset_path,
+            output_dir=args.output_dir,
+            epochs=args.epochs,
+            batch_size=args.batch_size,
+            learning_rate=args.learning_rate,
+            max_steps=args.max_steps
+        )
+        
+        if not success:
+            sys.exit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..4743b82
--- /dev/null
+++ b/test.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python3
+"""
+Test script for the styling data processor
+"""
+
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+from pipelines.styling.data_processor import StylingDataPipeline, create_custom_config, create_huggingface_config
+
+def test_styling_pipeline():
+    """Test the styling data processor with custom data"""
+    
+    print("Testing Styling Data Processor")
+    print("=" * 50)
+    
+    # Initialize the pipeline
+    pipeline = StylingDataPipeline()
+    
+    # Example 1: Load configuration from YAML
+    print("\n1. Loading configuration from YAML...")
+    try:
+        yaml_config = pipeline.load_config_from_yaml("./configs/styling/formal.yaml")
+        print(f"   ✅ YAML config loaded successfully!")
+        print(f"   Output directory: {yaml_config.output_dir}")
+        print(f"   Instruction: {yaml_config.instruction}")
+        print(f"   Input field: {yaml_config.input_field}")
+        print(f"   Output field: {yaml_config.output_field}")
+    except Exception as e:
+        print(f"   ❌ Error loading YAML config: {e}")
+        yaml_config = None
+    
+    # Example 2: Create custom dataset configuration
+    print("\n2. Creating custom dataset configuration...")
+    custom_config = create_custom_config(
+        data_path="./data/raw/styling/formal_dataset.jsonl",
+        data_format="jsonl",
+        input_field="text",
+        output_field="styled_text",
+        instruction="Rewrite the following text in a formal style",
+        max_samples=1000,
+        min_length=10,
+        max_length=256,
+        clean_text=True,
+        lowercase=False,
+        output_format="alpaca"
+    )
+    
+    print(f"   Input field: {custom_config.input_field} (maps to 'input')")
+    print(f"   Output field: {custom_config.output_field} (maps to 'output')")
+    print(f"   Instruction: {custom_config.instruction}")
+    print(f"   Max samples: {custom_config.max_samples}")
+    
+    # Example 3: Test with sample data (if available)
+    print("\n3. Testing pipeline with sample data...")
+    
+    # Create a sample dataset for testing
+    sample_data = [
+        {
+            "input": "Hey, what's up? How are you doing today?",
+            "output": "Hello, how are you doing today?"
+        },
+        {
+            "input": "This is really cool stuff!",
+            "output": "This is quite impressive material."
+        },
+        {
+            "input": "I'm gonna go to the store later.",
+            "output": "I will go to the store later."
+        }
+    ]
+    
+    # Save sample data to test file
+    import json
+    test_file = "./data/raw/styling/test_formal.jsonl"
+    os.makedirs(os.path.dirname(test_file), exist_ok=True)
+    
+    with open(test_file, 'w', encoding='utf-8') as f:
+        for item in sample_data:
+            f.write(json.dumps(item, ensure_ascii=False) + '\n')
+    
+    print(f"   Created test file: {test_file}")
+    
+    # Test the pipeline with the sample data
+    try:
+        test_config = create_custom_config(
+            data_path=test_file,
+            data_format="jsonl",
+            input_field="input",
+            output_field="output",
+            instruction="Rewrite the following text in a formal style",
+            max_samples=10,
+            output_format="alpaca"
+        )
+        
+        print("   Running pipeline...")
+        result = pipeline.run_pipeline(test_config, output_format="alpaca", save_splits=True, create_hf_dataset=True, save_hf_dataset=True)
+        
+        print("   ✅ Pipeline completed successfully!")
+        print(f"   Total samples: {result['analysis']['overall']['total_samples']}")
+        print(f"   Split sizes: {result['analysis']['overall']['split_sizes']}")
+        print(f"   Output directory: {result['output_dir']}")
+        
+        # Show HuggingFace dataset info if created
+        if 'hf_dataset' in result:
+            hf_dataset = result['hf_dataset']
+            print(f"   HuggingFace dataset created with {len(hf_dataset)} entries")
+            print(f"   Dataset features: {hf_dataset.features}")
+            
+            # Show save path if saved to disk
+            if 'hf_dataset_path' in result:
+                print(f"   Dataset saved to: {result['hf_dataset_path']}")
+            
+            # Show formatted example
+            if len(hf_dataset) > 0:
+                print(f"   Example formatted text:")
+                print(f"   {hf_dataset[0]['text'][:200]}...")
+        
+        # Show sample processed data
+        print("\n   Sample processed data:")
+        for split_name, split_data in result['data'].items():
+            if split_data:
+                print(f"   {split_name} split:")
+                for i, item in enumerate(split_data[:2]):  # Show first 2 items
+                    print(f"     Item {i+1}:")
+                    print(f"       Instruction: {item['instruction']}")
+                    print(f"       Input: {item['input'][:50]}...")
+                    print(f"       Output: {item['output'][:50]}...")
+                break
+        
+    except Exception as e:
+        print(f"   ❌ Error running pipeline: {e}")
+    
+    print("\n" + "=" * 50)
+    print("Test completed!")
+
+def test_hf_dataset_save_load():
+    """Test HuggingFace dataset save and load functionality"""
+    
+    print("\nTesting HuggingFace Dataset Save/Load")
+    print("=" * 50)
+    
+    from pipelines.styling.data_processor import save_hf_dataset_to_disk, load_hf_dataset_from_disk
+    
+    # Create a sample dataset for testing
+    sample_data = [
+        {
+            "instruction": "Rewrite in formal style",
+            "input": "Hey, what's up?",
+            "output": "Hello, how are you?"
+        },
+        {
+            "instruction": "Rewrite in formal style", 
+            "input": "This is really cool!",
+            "output": "This is quite impressive."
+        }
+    ]
+    
+    # Test configuration
+    config = create_custom_config(
+        data_path="dummy",
+        instruction="Rewrite in formal style"
+    )
+    
+    # Convert to HuggingFace dataset
+    pipeline = StylingDataPipeline()
+    hf_dataset = pipeline.convert_to_hf_dataset(sample_data, config)
+    
+    print(f"Created HuggingFace dataset with {len(hf_dataset)} entries")
+    
+    # Test saving to disk
+    save_path = "./data/processed/styling/test_hf_dataset"
+    print(f"\nSaving dataset to: {save_path}")
+    
+    success = save_hf_dataset_to_disk(hf_dataset, save_path)
+    if success:
+        print("✅ Dataset saved successfully!")
+        
+        # Test loading from disk
+        print(f"\nLoading dataset from: {save_path}")
+        loaded_dataset = load_hf_dataset_from_disk(save_path)
+        
+        if loaded_dataset is not None:
+            print("✅ Dataset loaded successfully!")
+            print(f"Loaded dataset has {len(loaded_dataset)} entries")
+            print(f"Features: {loaded_dataset.features}")
+            
+            # Show sample data
+            print("\nSample loaded data:")
+            for i in range(len(loaded_dataset)):
+                print(f"  Entry {i+1}: {loaded_dataset[i]['text'][:100]}...")
+        else:
+            print("❌ Failed to load dataset")
+    else:
+        print("❌ Failed to save dataset")
+    
+    return hf_dataset
+
+def test_hf_dataset_conversion():
+    """Test the HuggingFace dataset conversion"""
+    
+    print("\nTesting HuggingFace Dataset Conversion")
+    print("=" * 50)
+    
+    pipeline = StylingDataPipeline()
+    
+    # Sample data with instruction field
+    sample_data = [
+        {
+            "instruction": "Rewrite in formal style",
+            "input": "Hey, what's up?",
+            "output": "Hello, how are you?"
+        },
+        {
+            "instruction": "Rewrite in formal style", 
+            "input": "This is really cool!",
+            "output": "This is quite impressive."
+        }
+    ]
+    
+    # Test configuration
+    config = create_custom_config(
+        data_path="dummy",
+        instruction="Rewrite in formal style"
+    )
+    
+    # Convert to HuggingFace dataset
+    hf_dataset = pipeline.convert_to_hf_dataset(sample_data, config)
+    
+    print(f"HuggingFace dataset created with {len(hf_dataset)} entries")
+    print(f"Dataset features: {hf_dataset.features}")
+    
+    # Show formatted examples
+    print("\nFormatted examples:")
+    for i in range(len(hf_dataset)):
+        print(f"  Example {i+1}:")
+        print(f"    {hf_dataset[i]['text'][:150]}...")
+        print()
+    
+    # Test the dataset can be used for training
+    print("Dataset ready for training!")
+    print(f"Number of training examples: {len(hf_dataset)}")
+    
+    return hf_dataset
+
+
+if __name__ == "__main__":
+    test_styling_pipeline()
+    # test_hf_dataset_save_load()
+    # test_hf_dataset_conversion()
diff --git a/test.readme b/test.readme
new file mode 100644
index 0000000..e69de29