diff --git a/experiments/model_training.ipynb b/experiments/model_training.ipynb index 73eb8e5..e9310af 100644 --- a/experiments/model_training.ipynb +++ b/experiments/model_training.ipynb @@ -1,2743 +1,761 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Model Training for Fraud Detection\n", - "\n", - "This notebook focuses on training and evaluating machine learning models for fraud detection using the preprocessed transaction data.\n", - "\n", - "## Enhanced Features (Addressing Code Review):\n", - "- **Parameter configurations**: Easy-to-modify settings for testing different hypotheses\n", - "- **Easy model switching**: Flexible architecture for testing different algorithms\n", - "- **Detailed confusion matrix analysis**: Comprehensive precision/recall analysis across models, parameters, and balancing techniques\n", - "- **Class balancing comparison**: SMOTE vs Downsampling vs Class Weighting with thorough analysis" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ๐ŸŽ›๏ธ Enhanced Configuration Section\n", - "**Easy-to-modify parameters for testing different hypotheses and configurations**\n", - "\n", - "### Quick Start Guide:\n", - "1. **For Model Comparison**: Set multiple models to `True` in `MODELS_TO_TEST`\n", - "2. **For Parameter Tuning**: Modify `MODEL_PARAMS` ranges for specific models\n", - "3. **For Balancing Analysis**: Enable different techniques in `BALANCING_TECHNIQUES`\n", - "4. **For Business Focus**: Adjust `EVALUATION_CONFIG['scoring_metric']` based on priorities" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ================================\n", - "# ๐ŸŽ›๏ธ EXPERIMENT CONFIGURATION\n", - "# ================================\n", - "\n", - "# Model Selection (set to True to include in experiments)\n", - "MODELS_TO_TEST = {\n", - " 'logistic_regression': True,\n", - " 'random_forest': True,\n", - " 'gradient_boosting': True,\n", - " 'xgboost': True\n", - "}\n", - "\n", - "# Class Balancing Techniques (set to True to include)\n", - "BALANCING_TECHNIQUES = {\n", - " 'smote': True,\n", - " 'random_downsample': True,\n", - " 'class_weight': True,\n", - " 'no_balancing': True # Baseline\n", - "}\n", - "\n", - "# Model Parameters\n", - "MODEL_PARAMS = {\n", - " 'logistic_regression': {\n", - " 'max_iter': [1000, 2000],\n", - " 'C': [0.1, 1.0, 10.0]\n", - " },\n", - " 'random_forest': {\n", - " 'n_estimators': [100, 200],\n", - " 'max_depth': [10, 20, None],\n", - " 'min_samples_split': [2, 5]\n", - " },\n", - " 'gradient_boosting': {\n", - " 'n_estimators': [100, 200],\n", - " 'learning_rate': [0.1, 0.2],\n", - " 'max_depth': [3, 5]\n", - " },\n", - " 'xgboost': {\n", - " 'n_estimators': [100, 200],\n", - " 'learning_rate': [0.1, 0.2],\n", - " 'max_depth': [3, 5]\n", - " }\n", - "}\n", - "\n", - "# Evaluation Settings\n", - "EVALUATION_CONFIG = {\n", - " 'test_size': 0.2,\n", - " 'random_state': 42,\n", - " 'cv_folds': 3,\n", - " 'scoring_metric': 'f1', # Primary metric for model selection\n", - " 'plot_confusion_matrix': True,\n", - " 'plot_precision_recall': True,\n", - " 'plot_roc_curve': True\n", - "}\n", - "\n", - "# SMOTE Parameters\n", - "SMOTE_CONFIG = {\n", - " 'sampling_strategy': 'auto', # or specific ratio like 0.5\n", - " 'k_neighbors': 5\n", - "}\n", - "\n", - "# Downsampling Parameters\n", - "DOWNSAMPLE_CONFIG = {\n", - " 'sampling_strategy': 'auto', # Balance to majority class\n", - " 'replacement': False\n", - "}\n", - "\n", - "print(\"โœ… Configuration loaded successfully!\")\n", - "print(f\"Models to test: {[k for k, v in MODELS_TO_TEST.items() if v]}\")\n", - "print(f\"Balancing techniques: {[k for k, v in BALANCING_TECHNIQUES.items() if v]}\")\n", - "\n", - "# Import needed for experiment calculation\n", - "from itertools import product\n", - "\n", - "# Calculate total experiments\n", - "total_experiments = 0\n", - "for model, enabled in MODELS_TO_TEST.items():\n", - " if enabled:\n", - " params = MODEL_PARAMS.get(model, {})\n", - " if params:\n", - " param_combinations = list(product(*params.values()))\n", - " total_experiments += len(param_combinations) * sum(BALANCING_TECHNIQUES.values())\n", - " else:\n", - " total_experiments += sum(BALANCING_TECHNIQUES.values())\n", - "\n", - "print(f\"\\n๐ŸŽฏ Total experiments planned: {total_experiments}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import necessary libraries\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "import os\n", - "import sys\n", - "import joblib\n", - "import warnings\n", - "from itertools import product\n", - "from collections import defaultdict\n", - "import json\n", - "from IPython.display import display\n", - "\n", - "# Suppress warnings for cleaner output\n", - "warnings.filterwarnings('ignore')\n", - "\n", - "# Set plot style\n", - "plt.style.use('seaborn-v0_8-whitegrid')\n", - "sns.set_theme(font_scale=1.1)\n", - "\n", - "# Configure plot size\n", - "plt.rcParams['figure.figsize'] = (12, 8)\n", - "plt.rcParams['font.size'] = 10\n", - "\n", - "# Display all columns\n", - "pd.set_option('display.max_columns', None)\n", - "pd.set_option('display.width', None)\n", - "\n", - "print(\"๐Ÿ“š Libraries imported successfully!\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Add the project root to the path so we can import from src\n", - "sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))\n", - "from src import config\n", - "\n", - "print(f\"๐Ÿ“ Project paths configured:\")\n", - "print(f\" - Data directory: {config.DATA_DIR}\")\n", - "print(f\" - Models directory: {config.MODELS_DIR}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ๐Ÿ—๏ธ Model & Balancing Framework\n", - "**Flexible architecture for easy model and technique switching**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import ML libraries\n", - "from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score\n", - "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.metrics import (\n", - " accuracy_score, precision_score, recall_score, f1_score, \n", - " confusion_matrix, classification_report, roc_auc_score,\n", - " precision_recall_curve, roc_curve, auc\n", - ")\n", - "\n", - "# Import models\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", - "try:\n", - " import xgboost as xgb\n", - " XGBOOST_AVAILABLE = True\n", - " print(\"โœ… XGBoost available\")\n", - "except ImportError:\n", - " XGBOOST_AVAILABLE = False\n", - " print(\"โš ๏ธ XGBoost not available - will skip XGBoost experiments\")\n", - " MODELS_TO_TEST['xgboost'] = False\n", - "\n", - "# Import balancing techniques\n", - "from imblearn.over_sampling import SMOTE\n", - "from imblearn.under_sampling import RandomUnderSampler\n", - "from sklearn.utils.class_weight import compute_class_weight\n", - "\n", - "print(\"๐Ÿค– ML libraries imported successfully!\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ================================\n", - "# ๐Ÿญ MODEL FACTORY\n", - "# ================================\n", - "\n", - "def get_model(model_name, params=None, class_weights=None):\n", - " \"\"\"\n", - " Factory function to create models with specified parameters\n", - " \n", - " Args:\n", - " model_name (str): Name of the model\n", - " params (dict): Model parameters\n", - " class_weights (dict): Class weights for imbalanced data\n", - " \n", - " Returns:\n", - " sklearn model: Configured model instance\n", - " \"\"\"\n", - " if params is None:\n", - " params = {}\n", - " \n", - " models = {\n", - " 'logistic_regression': LogisticRegression(\n", - " random_state=EVALUATION_CONFIG['random_state'],\n", - " class_weight=class_weights,\n", - " **params\n", - " ),\n", - " 'random_forest': RandomForestClassifier(\n", - " random_state=EVALUATION_CONFIG['random_state'],\n", - " class_weight=class_weights,\n", - " **params\n", - " ),\n", - " 'gradient_boosting': GradientBoostingClassifier(\n", - " random_state=EVALUATION_CONFIG['random_state'],\n", - " **params\n", - " ),\n", - " 'xgboost': xgb.XGBClassifier(\n", - " random_state=EVALUATION_CONFIG['random_state'],\n", - " eval_metric='logloss',\n", - " **params\n", - " ) if XGBOOST_AVAILABLE else None\n", - " }\n", - " \n", - " return models.get(model_name)\n", - "\n", - "# ================================\n", - "# โš–๏ธ BALANCING TECHNIQUES FACTORY\n", - "# ================================\n", - "\n", - "def apply_balancing_technique(X_train, y_train, technique):\n", - " \"\"\"\n", - " Apply specified balancing technique to training data\n", - " \n", - " Args:\n", - " X_train: Training features\n", - " y_train: Training labels\n", - " technique (str): Balancing technique name\n", - " \n", - " Returns:\n", - " tuple: (X_balanced, y_balanced, class_weights, technique_info)\n", - " \"\"\"\n", - " technique_info = {'name': technique, 'original_shape': X_train.shape}\n", - " \n", - " if technique == 'smote':\n", - " smote = SMOTE(\n", - " sampling_strategy=SMOTE_CONFIG['sampling_strategy'],\n", - " k_neighbors=SMOTE_CONFIG['k_neighbors'],\n", - " random_state=EVALUATION_CONFIG['random_state']\n", - " )\n", - " X_balanced, y_balanced = smote.fit_resample(X_train, y_train)\n", - " class_weights = None\n", - " technique_info['new_shape'] = X_balanced.shape\n", - " technique_info['description'] = 'SMOTE oversampling'\n", - " \n", - " elif technique == 'random_downsample':\n", - " downsampler = RandomUnderSampler(\n", - " sampling_strategy=DOWNSAMPLE_CONFIG['sampling_strategy'],\n", - " random_state=EVALUATION_CONFIG['random_state']\n", - " )\n", - " X_balanced, y_balanced = downsampler.fit_resample(X_train, y_train)\n", - " class_weights = None\n", - " technique_info['new_shape'] = X_balanced.shape\n", - " technique_info['description'] = 'Random undersampling'\n", - " \n", - " elif technique == 'class_weight':\n", - " X_balanced, y_balanced = X_train, y_train\n", - " # Compute class weights\n", - " classes = np.unique(y_train)\n", - " weights = compute_class_weight('balanced', classes=classes, y=y_train)\n", - " class_weights = dict(zip(classes, weights))\n", - " technique_info['new_shape'] = X_balanced.shape\n", - " technique_info['description'] = f'Class weighting: {class_weights}'\n", - " \n", - " elif technique == 'no_balancing':\n", - " X_balanced, y_balanced = X_train, y_train\n", - " class_weights = None\n", - " technique_info['new_shape'] = X_balanced.shape\n", - " technique_info['description'] = 'No balancing (baseline)'\n", - " \n", - " else:\n", - " raise ValueError(f\"Unknown balancing technique: {technique}\")\n", - " \n", - " return X_balanced, y_balanced, class_weights, technique_info\n", - "\n", - "print(\"๐Ÿญ Model and balancing factories created!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ๐Ÿ“Š Comprehensive Evaluation Framework\n", - "**Detailed analysis and comparison system for all models and techniques**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ================================\n", - "# ๐Ÿ“ˆ EVALUATION FRAMEWORK\n", - "# ================================\n", - "\n", - "class ModelEvaluator:\n", - " \"\"\"\n", - " Comprehensive evaluation framework for fraud detection models\n", - " \"\"\"\n", - " \n", - " def __init__(self):\n", - " self.results = []\n", - " self.confusion_matrices = {}\n", - " \n", - " def evaluate_model(self, model, X_test, y_test, model_name, balancing_technique, params=None):\n", - " \"\"\"\n", - " Comprehensive model evaluation with detailed metrics\n", - " \"\"\"\n", - " # Make predictions\n", - " y_pred = model.predict(X_test)\n", - " y_pred_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None\n", - " \n", - " # Calculate metrics\n", - " metrics = {\n", - " 'model_name': model_name,\n", - " 'balancing_technique': balancing_technique,\n", - " 'parameters': params or {},\n", - " 'accuracy': accuracy_score(y_test, y_pred),\n", - " 'precision': precision_score(y_test, y_pred, zero_division=0),\n", - " 'recall': recall_score(y_test, y_pred, zero_division=0),\n", - " 'f1_score': f1_score(y_test, y_pred, zero_division=0),\n", - " 'roc_auc': roc_auc_score(y_test, y_pred_proba) if y_pred_proba is not None else None\n", - " }\n", - " \n", - " # Confusion matrix analysis\n", - " cm = confusion_matrix(y_test, y_pred)\n", - " tn, fp, fn, tp = cm.ravel()\n", - " \n", - " # Detailed confusion matrix metrics\n", - " metrics.update({\n", - " 'true_negatives': int(tn),\n", - " 'false_positives': int(fp),\n", - " 'false_negatives': int(fn),\n", - " 'true_positives': int(tp),\n", - " 'specificity': tn / (tn + fp) if (tn + fp) > 0 else 0,\n", - " 'sensitivity': tp / (tp + fn) if (tp + fn) > 0 else 0,\n", - " 'false_positive_rate': fp / (fp + tn) if (fp + tn) > 0 else 0,\n", - " 'false_negative_rate': fn / (fn + tp) if (fn + tp) > 0 else 0\n", - " })\n", - " \n", - " # Store results\n", - " self.results.append(metrics)\n", - " \n", - " # Store confusion matrix for detailed analysis\n", - " key = f\"{model_name}_{balancing_technique}\"\n", - " self.confusion_matrices[key] = {\n", - " 'matrix': cm,\n", - " 'model_name': model_name,\n", - " 'balancing_technique': balancing_technique,\n", - " 'metrics': metrics\n", - " }\n", - " \n", - " return metrics\n", - " \n", - " def plot_confusion_matrix_detailed(self, model_name, balancing_technique, figsize=(10, 8)):\n", - " \"\"\"\n", - " Plot detailed confusion matrix with comprehensive analysis\n", - " \"\"\"\n", - " key = f\"{model_name}_{balancing_technique}\"\n", - " if key not in self.confusion_matrices:\n", - " print(f\"No results found for {key}\")\n", - " return\n", - " \n", - " data = self.confusion_matrices[key]\n", - " cm = data['matrix']\n", - " metrics = data['metrics']\n", - " \n", - " fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=figsize)\n", - " fig.suptitle(f'Detailed Analysis: {model_name.title()} with {balancing_technique.title()}', \n", - " fontsize=16, fontweight='bold')\n", - " \n", - " # 1. Raw confusion matrix\n", - " sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax1, \n", - " xticklabels=['Not Fraud', 'Fraud'], yticklabels=['Not Fraud', 'Fraud'])\n", - " ax1.set_title('Raw Counts')\n", - " ax1.set_xlabel('Predicted')\n", - " ax1.set_ylabel('Actual')\n", - " \n", - " # 2. Normalized confusion matrix\n", - " cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", - " sns.heatmap(cm_norm, annot=True, fmt='.3f', cmap='Oranges', ax=ax2,\n", - " xticklabels=['Not Fraud', 'Fraud'], yticklabels=['Not Fraud', 'Fraud'])\n", - " ax2.set_title('Normalized by True Class')\n", - " ax2.set_xlabel('Predicted')\n", - " ax2.set_ylabel('Actual')\n", - " \n", - " # 3. Metrics visualization\n", - " metric_names = ['Precision', 'Recall', 'F1-Score', 'Specificity']\n", - " metric_values = [metrics['precision'], metrics['recall'], \n", - " metrics['f1_score'], metrics['specificity']]\n", - " \n", - " bars = ax3.bar(metric_names, metric_values, color=['skyblue', 'lightcoral', 'lightgreen', 'gold'])\n", - " ax3.set_title('Key Metrics')\n", - " ax3.set_ylabel('Score')\n", - " ax3.set_ylim(0, 1)\n", - " \n", - " # Add value labels on bars\n", - " for bar, value in zip(bars, metric_values):\n", - " ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, \n", - " f'{value:.3f}', ha='center', va='bottom')\n", - " \n", - " # 4. Error analysis\n", - " tn, fp, fn, tp = cm.ravel()\n", - " error_types = ['True Neg', 'False Pos', 'False Neg', 'True Pos']\n", - " error_counts = [tn, fp, fn, tp]\n", - " colors = ['green', 'red', 'orange', 'blue']\n", - " \n", - " wedges, texts, autotexts = ax4.pie(error_counts, labels=error_types, colors=colors, \n", - " autopct='%1.1f%%', startangle=90)\n", - " ax4.set_title('Prediction Distribution')\n", - " \n", - " plt.tight_layout()\n", - " plt.show()\n", - " \n", - " # Print detailed analysis\n", - " self._print_confusion_matrix_analysis(metrics, model_name, balancing_technique)\n", - " \n", - " def _print_confusion_matrix_analysis(self, metrics, model_name, balancing_technique):\n", - " \"\"\"\n", - " Print detailed textual analysis of confusion matrix\n", - " \"\"\"\n", - " print(f\"\\n๐Ÿ” DETAILED ANALYSIS: {model_name.upper()} with {balancing_technique.upper()}\")\n", - " print(\"=\" * 80)\n", - " \n", - " print(f\"\\n๐Ÿ“Š CONFUSION MATRIX BREAKDOWN:\")\n", - " print(f\" โ€ข True Negatives (TN): {metrics['true_negatives']:,} - Correctly identified non-fraud\")\n", - " print(f\" โ€ข False Positives (FP): {metrics['false_positives']:,} - Incorrectly flagged as fraud\")\n", - " print(f\" โ€ข False Negatives (FN): {metrics['false_negatives']:,} - Missed fraud cases\")\n", - " print(f\" โ€ข True Positives (TP): {metrics['true_positives']:,} - Correctly identified fraud\")\n", - " \n", - " print(f\"\\n๐ŸŽฏ PRECISION & RECALL ANALYSIS:\")\n", - " print(f\" โ€ข Precision: {metrics['precision']:.4f}\")\n", - " print(f\" โ†’ Of all fraud predictions, {metrics['precision']*100:.2f}% were actually fraud\")\n", - " print(f\" โ†’ {metrics['false_positives']:,} legitimate transactions incorrectly flagged\")\n", - " \n", - " print(f\" โ€ข Recall (Sensitivity): {metrics['recall']:.4f}\")\n", - " print(f\" โ†’ Detected {metrics['recall']*100:.2f}% of all actual fraud cases\")\n", - " print(f\" โ†’ Missed {metrics['false_negatives']:,} fraud transactions\")\n", - " \n", - " print(f\" โ€ข Specificity: {metrics['specificity']:.4f}\")\n", - " print(f\" โ†’ Correctly identified {metrics['specificity']*100:.2f}% of legitimate transactions\")\n", - " \n", - " print(f\"\\nโš–๏ธ TRADE-OFF ANALYSIS:\")\n", - " if metrics['precision'] > 0.8 and metrics['recall'] > 0.8:\n", - " print(f\" โœ… EXCELLENT: High precision AND high recall - optimal performance\")\n", - " elif metrics['precision'] > 0.8:\n", - " print(f\" ๐ŸŽฏ HIGH PRECISION: Low false alarms, but may miss some fraud\")\n", - " print(f\" โ†’ Good for minimizing customer inconvenience\")\n", - " elif metrics['recall'] > 0.8:\n", - " print(f\" ๐Ÿ” HIGH RECALL: Catches most fraud, but more false alarms\")\n", - " print(f\" โ†’ Good for maximizing fraud detection\")\n", - " else:\n", - " print(f\" โš ๏ธ BALANCED: Moderate precision and recall\")\n", - " \n", - " print(f\"\\n๐Ÿ’ฐ BUSINESS IMPACT:\")\n", - " fp_cost = metrics['false_positives'] * 10 # Assume $10 cost per false positive\n", - " fn_cost = metrics['false_negatives'] * 100 # Assume $100 cost per missed fraud\n", - " total_cost = fp_cost + fn_cost\n", - " print(f\" โ€ข Estimated FP cost: ${fp_cost:,} ({metrics['false_positives']:,} ร— $10)\")\n", - " print(f\" โ€ข Estimated FN cost: ${fn_cost:,} ({metrics['false_negatives']:,} ร— $100)\")\n", - " print(f\" โ€ข Total estimated cost: ${total_cost:,}\")\n", - "\n", - "# Initialize evaluator\n", - "evaluator = ModelEvaluator()\n", - "print(\"๐Ÿ“Š Comprehensive evaluation framework ready!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Load the Preprocessed Data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's load the preprocessed training and test data that we created in the feature engineering notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load preprocessed training data\n", - "try:\n", - " train_data = pd.read_csv(config.PROCESSED_TRAIN_DATA_PATH)\n", - " print(f'Loaded preprocessed training data from {config.PROCESSED_TRAIN_DATA_PATH}')\n", - "except FileNotFoundError:\n", - " print(f'Preprocessed training data not found at {config.PROCESSED_TRAIN_DATA_PATH}')\n", - " print('Please run the feature_engineering.ipynb notebook first to create the preprocessed data.')\n", - " # If preprocessed data doesn't exist, we'll load and preprocess the raw data here\n", - " # This is just a fallback and would normally be handled by the feature engineering notebook\n", - " train_data = pd.read_csv(config.TRAIN_DATA_PATH)\n", - " print(f'Loaded raw training data from {config.TRAIN_DATA_PATH} instead.')\n", - "\n", - "# Load preprocessed test data\n", - "try:\n", - " test_data = pd.read_csv(config.PROCESSED_TEST_DATA_PATH)\n", - " print(f'Loaded preprocessed test data from {config.PROCESSED_TEST_DATA_PATH}')\n", - "except FileNotFoundError:\n", - " print(f'Preprocessed test data not found at {config.PROCESSED_TEST_DATA_PATH}')\n", - " # If preprocessed data doesn't exist, we'll load the raw data\n", - " test_data = pd.read_csv(config.TEST_DATA_PATH)\n", - " print(f'Loaded raw test data from {config.TEST_DATA_PATH} instead.')\n", - "\n", - "print(f'\\n๐Ÿ“Š Data Summary:')\n", - "print(f' โ€ข Training data shape: {train_data.shape}')\n", - "print(f' โ€ข Test data shape: {test_data.shape}')\n", - "\n", - "# Check for target variable\n", - "if 'is_fraud' in train_data.columns:\n", - " fraud_rate = train_data['is_fraud'].mean()\n", - " print(f' โ€ข Fraud rate: {fraud_rate:.4f} ({fraud_rate*100:.2f}%)')\n", - " print(f' โ€ข Class distribution: {train_data[\"is_fraud\"].value_counts().to_dict()}')\n", - "else:\n", - " print(' โš ๏ธ Target variable \"is_fraud\" not found in training data')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Display the first few rows of the training data\n", - "print(\"๐Ÿ“‹ Sample of training data:\")\n", - "display(train_data.head())\n", - "\n", - "print(\"\\n๐Ÿ“‹ Data types and missing values:\")\n", - "info_df = pd.DataFrame({\n", - " 'Data Type': train_data.dtypes,\n", - " 'Missing Values': train_data.isnull().sum(),\n", - " 'Missing %': (train_data.isnull().sum() / len(train_data) * 100).round(2)\n", - "})\n", - "display(info_df[info_df['Missing Values'] > 0]) # Only show columns with missing values" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ๐Ÿš€ Comprehensive Experiment Runner\n", - "**Systematic testing of all model and balancing technique combinations**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ================================\n", - "# ๐Ÿงช EXPERIMENT RUNNER\n", - "# ================================\n", - "\n", - "def run_comprehensive_experiments():\n", - " \"\"\"\n", - " Run systematic experiments across all model and balancing combinations\n", - " \"\"\"\n", - " print(\"๐Ÿš€ Starting Comprehensive Fraud Detection Experiments\")\n", - " print(\"=\" * 60)\n", - " \n", - " # Prepare data\n", - " if 'is_fraud' not in train_data.columns:\n", - " print(\"โŒ Error: Target variable 'is_fraud' not found\")\n", - " return\n", - " \n", - " # Split features and target\n", - " X = train_data.drop('is_fraud', axis=1)\n", - " y = train_data['is_fraud']\n", - " \n", - " # Split into train and validation sets\n", - " X_train, X_val, y_train, y_val = train_test_split(\n", - " X, y, \n", - " test_size=EVALUATION_CONFIG['test_size'],\n", - " random_state=EVALUATION_CONFIG['random_state'],\n", - " stratify=y\n", - " )\n", - " \n", - " print(f\"๐Ÿ“Š Data split completed:\")\n", - " print(f\" โ€ข Training: {X_train.shape[0]:,} samples\")\n", - " print(f\" โ€ข Validation: {X_val.shape[0]:,} samples\")\n", - " \n", - " # Identify feature types\n", - " categorical_cols = X_train.select_dtypes(include=['object', 'category']).columns.tolist()\n", - " numerical_cols = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()\n", - " \n", - " print(f\" โ€ข Categorical features: {len(categorical_cols)}\")\n", - " print(f\" โ€ข Numerical features: {len(numerical_cols)}\")\n", - " \n", - " # Create preprocessing pipeline\n", - " preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', StandardScaler(), numerical_cols),\n", - " ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)\n", - " ]\n", - " )\n", - " \n", - " # Preprocess validation data once\n", - " print(\"\\n๐Ÿ”„ Preprocessing validation data...\")\n", - " X_val_processed = preprocessor.fit_transform(X_val)\n", - " \n", - " # Initialize results storage\n", - " experiment_results = []\n", - " experiment_count = 0\n", - " \n", - " # Calculate total experiments\n", - " active_models = [k for k, v in MODELS_TO_TEST.items() if v]\n", - " active_balancing = [k for k, v in BALANCING_TECHNIQUES.items() if v]\n", - " total_experiments = len(active_models) * len(active_balancing)\n", - " \n", - " print(f\"\\n๐ŸŽฏ Running {total_experiments} experiments...\")\n", - " print(f\" โ€ข Models: {active_models}\")\n", - " print(f\" โ€ข Balancing techniques: {active_balancing}\")\n", - " \n", - " # Run experiments\n", - " for model_name in active_models:\n", - " for balancing_technique in active_balancing:\n", - " experiment_count += 1\n", - " print(f\"\\n๐Ÿ”ฌ Experiment {experiment_count}/{total_experiments}: {model_name.upper()} + {balancing_technique.upper()}\")\n", - " print(\"-\" * 50)\n", - " \n", - " try:\n", - " # Apply balancing technique\n", - " X_train_balanced, y_train_balanced, class_weights, technique_info = apply_balancing_technique(\n", - " X_train, y_train, balancing_technique\n", - " )\n", - " \n", - " print(f\" โš–๏ธ {technique_info['description']}\")\n", - " print(f\" Original: {technique_info['original_shape']} โ†’ Balanced: {technique_info['new_shape']}\")\n", - " \n", - " # Preprocess training data\n", - " if balancing_technique in ['smote', 'random_downsample']:\n", - " # For resampling techniques, fit preprocessor on original data, then apply to resampled\n", - " preprocessor.fit(X_train)\n", - " X_train_processed = preprocessor.transform(X_train_balanced)\n", - " else:\n", - " # For class weighting or no balancing, use original data\n", - " X_train_processed = preprocessor.fit_transform(X_train_balanced)\n", - " \n", - " # Test different parameter combinations for this model\n", - " model_params = MODEL_PARAMS.get(model_name, {})\n", - " \n", - " if model_params:\n", - " # Generate parameter combinations\n", - " param_names = list(model_params.keys())\n", - " param_values = list(model_params.values())\n", - " param_combinations = list(product(*param_values))\n", - " \n", - " print(f\" ๐Ÿ”ง Testing {len(param_combinations)} parameter combinations...\")\n", - " \n", - " for param_combo in param_combinations:\n", - " # Create parameter dictionary\n", - " current_params = dict(zip(param_names, param_combo))\n", - " param_str = ', '.join([f'{k}={v}' for k, v in current_params.items()])\n", - " \n", - " print(f\" ๐ŸŽ›๏ธ Parameters: {param_str}\")\n", - " \n", - " # Get model with current parameters\n", - " model = get_model(model_name, params=current_params, class_weights=class_weights)\n", - " \n", - " if model is None:\n", - " print(f\" โŒ Model {model_name} not available\")\n", - " continue\n", - " \n", - " # Train model\n", - " model.fit(X_train_processed, y_train_balanced)\n", - " \n", - " # Evaluate model\n", - " metrics = evaluator.evaluate_model(\n", - " model, X_val_processed, y_val, \n", - " f\"{model_name}_{param_str.replace(' ', '').replace(',', '_').replace('=', '')}\", \n", - " balancing_technique, \n", - " params=current_params\n", - " )\n", - " \n", - " # Store results with parameter info\n", - " experiment_results.append({\n", - " 'experiment_id': experiment_count,\n", - " 'model_name': model_name,\n", - " 'balancing_technique': balancing_technique,\n", - " 'parameters': current_params,\n", - " 'param_string': param_str,\n", - " 'technique_info': technique_info,\n", - " 'metrics': metrics\n", - " })\n", - " \n", - " # Print quick summary\n", - " print(f\" โœ… F1={metrics['f1_score']:.3f}, P={metrics['precision']:.3f}, R={metrics['recall']:.3f}\")\n", - " \n", - " else:\n", - " # No parameters to test, use default\n", - " model = get_model(model_name, class_weights=class_weights)\n", - " \n", - " if model is None:\n", - " print(f\" โŒ Model {model_name} not available\")\n", - " continue\n", - " \n", - " # Train model\n", - " print(f\" ๐Ÿ‹๏ธ Training {model_name} with default parameters...\")\n", - " model.fit(X_train_processed, y_train_balanced)\n", - " \n", - " # Evaluate model\n", - " print(f\" ๐Ÿ“Š Evaluating...\")\n", - " metrics = evaluator.evaluate_model(\n", - " model, X_val_processed, y_val, \n", - " model_name, balancing_technique\n", - " )\n", - " \n", - " # Store results\n", - " experiment_results.append({\n", - " 'experiment_id': experiment_count,\n", - " 'model_name': model_name,\n", - " 'balancing_technique': balancing_technique,\n", - " 'parameters': {},\n", - " 'param_string': 'default',\n", - " 'technique_info': technique_info,\n", - " 'metrics': metrics\n", - " })\n", - " \n", - " # Print quick summary\n", - " print(f\" โœ… Results: F1={metrics['f1_score']:.3f}, Precision={metrics['precision']:.3f}, Recall={metrics['recall']:.3f}\")\n", - " \n", - " except Exception as e:\n", - " print(f\" โŒ Error in experiment: {str(e)}\")\n", - " continue\n", - " \n", - " print(f\"\\n๐ŸŽ‰ All experiments completed! ({experiment_count} total)\")\n", - " return experiment_results\n", - "\n", - "# Run the comprehensive experiments\n", - "print(\"Starting comprehensive experiments...\")\n", - "all_results = run_comprehensive_experiments()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ๐Ÿ“ˆ Comprehensive Results Analysis\n", - "**Detailed comparison and analysis of all experiments**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ================================\n", - "# ๐Ÿ“Š RESULTS ANALYSIS FRAMEWORK\n", - "# ================================\n", - "\n", - "def analyze_experiment_results(results):\n", - " \"\"\"\n", - " Comprehensive analysis of all experiment results\n", - " \"\"\"\n", - " if not results:\n", - " print(\"โŒ No results to analyze\")\n", - " return\n", - " \n", - " print(\"๐Ÿ“Š COMPREHENSIVE RESULTS ANALYSIS\")\n", - " print(\"=\" * 60)\n", - " \n", - " # Create results DataFrame\n", - " results_data = []\n", - " for result in results:\n", - " metrics = result['metrics']\n", - " results_data.append({\n", - " 'Model': result['model_name'].replace('_', ' ').title(),\n", - " 'Balancing': result['balancing_technique'].replace('_', ' ').title(),\n", - " 'F1 Score': metrics['f1_score'],\n", - " 'Precision': metrics['precision'],\n", - " 'Recall': metrics['recall'],\n", - " 'Accuracy': metrics['accuracy'],\n", - " 'ROC AUC': metrics['roc_auc'] if metrics['roc_auc'] else 0,\n", - " 'True Positives': metrics['true_positives'],\n", - " 'False Positives': metrics['false_positives'],\n", - " 'False Negatives': metrics['false_negatives'],\n", - " 'True Negatives': metrics['true_negatives']\n", - " })\n", - " \n", - " results_df = pd.DataFrame(results_data)\n", - " \n", - " # 1. Overall Performance Summary\n", - " print(\"\\n๐Ÿ† TOP PERFORMERS BY METRIC:\")\n", - " print(\"-\" * 40)\n", - " \n", - " metrics_to_analyze = ['F1 Score', 'Precision', 'Recall', 'Accuracy']\n", - " for metric in metrics_to_analyze:\n", - " best_idx = results_df[metric].idxmax()\n", - " best_result = results_df.iloc[best_idx]\n", - " print(f\" ๐Ÿฅ‡ Best {metric}: {best_result['Model']} + {best_result['Balancing']} ({best_result[metric]:.4f})\")\n", - " \n", - " # 2. Model Comparison\n", - " print(\"\\n๐Ÿค– MODEL PERFORMANCE COMPARISON:\")\n", - " print(\"-\" * 40)\n", - " model_comparison = results_df.groupby('Model')[['F1 Score', 'Precision', 'Recall']].agg(['mean', 'std']).round(4)\n", - " display(model_comparison)\n", - " \n", - " # 3. Balancing Technique Comparison\n", - " print(\"\\nโš–๏ธ BALANCING TECHNIQUE COMPARISON:\")\n", - " print(\"-\" * 40)\n", - " balancing_comparison = results_df.groupby('Balancing')[['F1 Score', 'Precision', 'Recall']].agg(['mean', 'std']).round(4)\n", - " display(balancing_comparison)\n", - " \n", - " # 4. Detailed Results Table\n", - " print(\"\\n๐Ÿ“‹ DETAILED RESULTS TABLE:\")\n", - " print(\"-\" * 40)\n", - " display_df = results_df[['Model', 'Balancing', 'F1 Score', 'Precision', 'Recall', 'Accuracy']].round(4)\n", - " display_df = display_df.sort_values('F1 Score', ascending=False)\n", - " display(display_df)\n", - " \n", - " return results_df\n", - "\n", - "def plot_comprehensive_comparison(results_df):\n", - " \"\"\"\n", - " Create comprehensive visualization of all results\n", - " \"\"\"\n", - " fig, axes = plt.subplots(2, 3, figsize=(20, 12))\n", - " fig.suptitle('Comprehensive Model & Balancing Technique Comparison', fontsize=16, fontweight='bold')\n", - " \n", - " # 1. F1 Score Heatmap\n", - " pivot_f1 = results_df.pivot(index='Model', columns='Balancing', values='F1 Score')\n", - " sns.heatmap(pivot_f1, annot=True, fmt='.3f', cmap='YlOrRd', ax=axes[0,0])\n", - " axes[0,0].set_title('F1 Score by Model & Balancing')\n", - " \n", - " # 2. Precision Heatmap\n", - " pivot_precision = results_df.pivot(index='Model', columns='Balancing', values='Precision')\n", - " sns.heatmap(pivot_precision, annot=True, fmt='.3f', cmap='Blues', ax=axes[0,1])\n", - " axes[0,1].set_title('Precision by Model & Balancing')\n", - " \n", - " # 3. Recall Heatmap\n", - " pivot_recall = results_df.pivot(index='Model', columns='Balancing', values='Recall')\n", - " sns.heatmap(pivot_recall, annot=True, fmt='.3f', cmap='Greens', ax=axes[0,2])\n", - " axes[0,2].set_title('Recall by Model & Balancing')\n", - " \n", - " # 4. Model Performance Comparison\n", - " model_means = results_df.groupby('Model')[['F1 Score', 'Precision', 'Recall']].mean()\n", - " model_means.plot(kind='bar', ax=axes[1,0])\n", - " axes[1,0].set_title('Average Performance by Model')\n", - " axes[1,0].set_ylabel('Score')\n", - " axes[1,0].legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n", - " axes[1,0].tick_params(axis='x', rotation=45)\n", - " \n", - " # 5. Balancing Technique Performance\n", - " balancing_means = results_df.groupby('Balancing')[['F1 Score', 'Precision', 'Recall']].mean()\n", - " balancing_means.plot(kind='bar', ax=axes[1,1])\n", - " axes[1,1].set_title('Average Performance by Balancing Technique')\n", - " axes[1,1].set_ylabel('Score')\n", - " axes[1,1].legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n", - " axes[1,1].tick_params(axis='x', rotation=45)\n", - " \n", - " # 6. Precision vs Recall Scatter\n", - " for balancing in results_df['Balancing'].unique():\n", - " subset = results_df[results_df['Balancing'] == balancing]\n", - " axes[1,2].scatter(subset['Precision'], subset['Recall'], \n", - " label=balancing, s=100, alpha=0.7)\n", - " \n", - " axes[1,2].set_xlabel('Precision')\n", - " axes[1,2].set_ylabel('Recall')\n", - " axes[1,2].set_title('Precision vs Recall by Balancing Technique')\n", - " axes[1,2].legend()\n", - " axes[1,2].grid(True, alpha=0.3)\n", - " \n", - " plt.tight_layout()\n", - " plt.show()\n", - "\n", - "# Analyze results\n", - "if 'all_results' in locals() and all_results:\n", - " results_df = analyze_experiment_results(all_results)\n", - " plot_comprehensive_comparison(results_df)\n", - " \n", - " # CRITICAL: Add parameter variation analysis\n", - " print(\"\\n\" + \"=\" * 80)\n", - " print(\"๐Ÿ”ง PARAMETER VARIATION ANALYSIS\")\n", - " print(\"=\" * 80)\n", - " \n", - " # Analyze how parameters affect performance for each model-balancing combination\n", - " param_analysis_results = {}\n", - " \n", - " for result in all_results:\n", - " model_name = result['model_name']\n", - " balancing = result['balancing_technique']\n", - " key = f\"{model_name}_{balancing}\"\n", - " \n", - " if key not in param_analysis_results:\n", - " param_analysis_results[key] = []\n", - " \n", - " param_analysis_results[key].append({\n", - " 'param_string': result.get('param_string', 'default'),\n", - " 'parameters': result.get('parameters', {}),\n", - " 'f1_score': result['metrics']['f1_score'],\n", - " 'precision': result['metrics']['precision'],\n", - " 'recall': result['metrics']['recall'],\n", - " 'false_positives': result['metrics']['false_positives'],\n", - " 'false_negatives': result['metrics']['false_negatives']\n", - " })\n", - " \n", - " # Display parameter impact for each combination\n", - " for key, param_results in param_analysis_results.items():\n", - " if len(param_results) > 1: # Only analyze if multiple parameter combinations\n", - " model_name, balancing = key.split('_', 1)\n", - " \n", - " print(f\"\\n๐Ÿ” PARAMETER IMPACT: {model_name.upper()} + {balancing.upper()}\")\n", - " print(\"-\" * 60)\n", - " \n", - " # Create comparison DataFrame\n", - " param_df = pd.DataFrame(param_results).sort_values('f1_score', ascending=False)\n", - " \n", - " print(\"๐Ÿ“Š Parameter Performance Comparison (sorted by F1 Score):\")\n", - " display_cols = ['param_string', 'f1_score', 'precision', 'recall', 'false_positives', 'false_negatives']\n", - " display(param_df[display_cols].round(4))\n", - " \n", - " # Analyze best vs worst\n", - " best = param_df.iloc[0]\n", - " worst = param_df.iloc[-1]\n", - " \n", - " print(f\"\\n๐Ÿ† BEST PARAMETERS: {best['param_string']}\")\n", - " print(f\" โ€ข F1: {best['f1_score']:.4f}, Precision: {best['precision']:.4f}, Recall: {best['recall']:.4f}\")\n", - " print(f\" โ€ข Errors: {best['false_positives']} FP, {best['false_negatives']} FN\")\n", - " \n", - " print(f\"\\n๐Ÿ“‰ WORST PARAMETERS: {worst['param_string']}\")\n", - " print(f\" โ€ข F1: {worst['f1_score']:.4f}, Precision: {worst['precision']:.4f}, Recall: {worst['recall']:.4f}\")\n", - " print(f\" โ€ข Errors: {worst['false_positives']} FP, {worst['false_negatives']} FN\")\n", - " \n", - " # Calculate improvement\n", - " f1_improvement = best['f1_score'] - worst['f1_score']\n", - " precision_improvement = best['precision'] - worst['precision']\n", - " recall_improvement = best['recall'] - worst['recall']\n", - " \n", - " print(f\"\\n๐Ÿ“ˆ PARAMETER TUNING IMPACT:\")\n", - " print(f\" โ€ข F1 Score improvement: {f1_improvement:.4f} ({f1_improvement/worst['f1_score']*100:.1f}% relative)\")\n", - " print(f\" โ€ข Precision change: {precision_improvement:+.4f}\")\n", - " print(f\" โ€ข Recall change: {recall_improvement:+.4f}\")\n", - " \n", - " # Confusion matrix comparison insight\n", - " fp_change = best['false_positives'] - worst['false_positives']\n", - " fn_change = best['false_negatives'] - worst['false_negatives']\n", - " \n", - " print(f\"\\n๐ŸŽฏ CONFUSION MATRIX CHANGES:\")\n", - " print(f\" โ€ข False Positives: {fp_change:+d} ({'reduced' if fp_change < 0 else 'increased'} customer inconvenience)\")\n", - " print(f\" โ€ข False Negatives: {fn_change:+d} ({'reduced' if fn_change < 0 else 'increased'} missed fraud)\")\n", - " \n", - " if fp_change < 0 and fn_change < 0:\n", - " print(f\" โœ… EXCELLENT: Parameter tuning reduced both types of errors!\")\n", - " elif fp_change < 0:\n", - " print(f\" ๐ŸŽฏ PRECISION FOCUSED: Reduced false alarms (better customer experience)\")\n", - " elif fn_change < 0:\n", - " print(f\" ๐Ÿ” RECALL FOCUSED: Reduced missed fraud (better fraud detection)\")\n", - " else:\n", - " print(f\" โš ๏ธ TRADE-OFF: Parameter tuning improved F1 through better balance\")\n", - " \n", - " else:\n", - " model_name, balancing = key.split('_', 1)\n", - " print(f\"\\nโš ๏ธ {model_name.upper()} + {balancing.upper()}: Only one parameter combination tested\")\n", - " \n", - "else:\n", - " print(\"โš ๏ธ No experiment results found. Please run the experiments first.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ๐ŸŽฏ Detailed Confusion Matrix Analysis\n", - "**In-depth analysis of precision/recall trade-offs for each approach**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ================================\n", - "# ๐ŸŽฏ CONFUSION MATRIX DEEP DIVE\n", - "# ================================\n", - "\n", - "def analyze_confusion_matrices():\n", - " \"\"\"\n", - " Detailed analysis of confusion matrices for all experiments\n", - " \"\"\"\n", - " print(\"๐ŸŽฏ DETAILED CONFUSION MATRIX ANALYSIS\")\n", - " print(\"=\" * 60)\n", - " \n", - " if not evaluator.confusion_matrices:\n", - " print(\"โŒ No confusion matrices found. Please run experiments first.\")\n", - " return\n", - " \n", - " # Analyze each model-balancing combination\n", - " for key, data in evaluator.confusion_matrices.items():\n", - " model_name = data['model_name']\n", - " balancing_technique = data['balancing_technique']\n", - " \n", - " print(f\"\\n๐Ÿ” Analyzing: {model_name.upper()} + {balancing_technique.upper()}\")\n", - " print(\"=\" * 50)\n", - " \n", - " # Plot detailed confusion matrix\n", - " evaluator.plot_confusion_matrix_detailed(model_name, balancing_technique)\n", - "\n", - "def compare_balancing_techniques_detailed():\n", - " \"\"\"\n", - " Detailed comparison of how different balancing techniques affect precision/recall\n", - " \"\"\"\n", - " print(\"\\nโš–๏ธ BALANCING TECHNIQUES: PRECISION/RECALL TRADE-OFF ANALYSIS\")\n", - " print(\"=\" * 70)\n", - " \n", - " if not all_results:\n", - " print(\"โŒ No results available for analysis\")\n", - " return\n", - " \n", - " # Group results by balancing technique\n", - " balancing_analysis = {}\n", - " \n", - " for result in all_results:\n", - " technique = result['balancing_technique']\n", - " metrics = result['metrics']\n", - " \n", - " if technique not in balancing_analysis:\n", - " balancing_analysis[technique] = {\n", - " 'results': [],\n", - " 'avg_precision': 0,\n", - " 'avg_recall': 0,\n", - " 'avg_f1': 0,\n", - " 'total_fp': 0,\n", - " 'total_fn': 0\n", - " }\n", - " \n", - " balancing_analysis[technique]['results'].append(metrics)\n", - " balancing_analysis[technique]['total_fp'] += metrics['false_positives']\n", - " balancing_analysis[technique]['total_fn'] += metrics['false_negatives']\n", - " \n", - " # Calculate averages and analyze\n", - " for technique, data in balancing_analysis.items():\n", - " results = data['results']\n", - " n_results = len(results)\n", - " \n", - " avg_precision = sum(r['precision'] for r in results) / n_results\n", - " avg_recall = sum(r['recall'] for r in results) / n_results\n", - " avg_f1 = sum(r['f1_score'] for r in results) / n_results\n", - " \n", - " data['avg_precision'] = avg_precision\n", - " data['avg_recall'] = avg_recall\n", - " data['avg_f1'] = avg_f1\n", - " \n", - " print(f\"\\n๐Ÿ”ฌ {technique.upper().replace('_', ' ')} ANALYSIS:\")\n", - " print(\"-\" * 40)\n", - " print(f\" ๐Ÿ“Š Average Metrics (across {n_results} models):\")\n", - " print(f\" โ€ข Precision: {avg_precision:.4f}\")\n", - " print(f\" โ€ข Recall: {avg_recall:.4f}\")\n", - " print(f\" โ€ข F1 Score: {avg_f1:.4f}\")\n", - " \n", - " print(f\" ๐ŸŽฏ Error Analysis:\")\n", - " print(f\" โ€ข Total False Positives: {data['total_fp']:,}\")\n", - " print(f\" โ€ข Total False Negatives: {data['total_fn']:,}\")\n", - " \n", - " # Technique-specific insights\n", - " if technique == 'smote':\n", - " print(f\" ๐Ÿ’ก SMOTE Insights:\")\n", - " print(f\" โ€ข Synthetic oversampling tends to improve recall\")\n", - " print(f\" โ€ข May introduce noise, potentially affecting precision\")\n", - " print(f\" โ€ข Good for learning minority class patterns\")\n", - " elif technique == 'random_downsample':\n", - " print(f\" ๐Ÿ’ก Downsampling Insights:\")\n", - " print(f\" โ€ข Reduces dataset size, faster training\")\n", - " print(f\" โ€ข May lose important majority class information\")\n", - " print(f\" โ€ข Can lead to overfitting on reduced data\")\n", - " elif technique == 'class_weight':\n", - " print(f\" ๐Ÿ’ก Class Weighting Insights:\")\n", - " print(f\" โ€ข Preserves all original data\")\n", - " print(f\" โ€ข Adjusts model's decision boundary\")\n", - " print(f\" โ€ข May be sensitive to weight selection\")\n", - " elif technique == 'no_balancing':\n", - " print(f\" ๐Ÿ’ก No Balancing Insights:\")\n", - " print(f\" โ€ข Baseline performance with imbalanced data\")\n", - " print(f\" โ€ข Typically biased toward majority class\")\n", - " print(f\" โ€ข May have high precision but low recall\")\n", - " \n", - " # Create comparison visualization\n", - " create_balancing_comparison_plot(balancing_analysis)\n", - "\n", - "def create_balancing_comparison_plot(balancing_analysis):\n", - " \"\"\"\n", - " Create detailed visualization comparing balancing techniques\n", - " \"\"\"\n", - " fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))\n", - " fig.suptitle('Balancing Techniques: Detailed Comparison', fontsize=16, fontweight='bold')\n", - " \n", - " techniques = list(balancing_analysis.keys())\n", - " precisions = [balancing_analysis[t]['avg_precision'] for t in techniques]\n", - " recalls = [balancing_analysis[t]['avg_recall'] for t in techniques]\n", - " f1_scores = [balancing_analysis[t]['avg_f1'] for t in techniques]\n", - " \n", - " # 1. Precision Comparison\n", - " bars1 = ax1.bar(techniques, precisions, color='skyblue', alpha=0.8)\n", - " ax1.set_title('Average Precision by Balancing Technique')\n", - " ax1.set_ylabel('Precision')\n", - " ax1.set_ylim(0, 1)\n", - " for bar, val in zip(bars1, precisions):\n", - " ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, \n", - " f'{val:.3f}', ha='center', va='bottom')\n", - " ax1.tick_params(axis='x', rotation=45)\n", - " \n", - " # 2. Recall Comparison\n", - " bars2 = ax2.bar(techniques, recalls, color='lightcoral', alpha=0.8)\n", - " ax2.set_title('Average Recall by Balancing Technique')\n", - " ax2.set_ylabel('Recall')\n", - " ax2.set_ylim(0, 1)\n", - " for bar, val in zip(bars2, recalls):\n", - " ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, \n", - " f'{val:.3f}', ha='center', va='bottom')\n", - " ax2.tick_params(axis='x', rotation=45)\n", - " \n", - " # 3. F1 Score Comparison\n", - " bars3 = ax3.bar(techniques, f1_scores, color='lightgreen', alpha=0.8)\n", - " ax3.set_title('Average F1 Score by Balancing Technique')\n", - " ax3.set_ylabel('F1 Score')\n", - " ax3.set_ylim(0, 1)\n", - " for bar, val in zip(bars3, f1_scores):\n", - " ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, \n", - " f'{val:.3f}', ha='center', va='bottom')\n", - " ax3.tick_params(axis='x', rotation=45)\n", - " \n", - " # 4. Precision vs Recall Trade-off\n", - " colors = ['blue', 'red', 'green', 'orange']\n", - " for i, technique in enumerate(techniques):\n", - " ax4.scatter(precisions[i], recalls[i], \n", - " s=200, alpha=0.7, color=colors[i % len(colors)], \n", - " label=technique.replace('_', ' ').title())\n", - " ax4.annotate(technique.replace('_', ' ').title(), \n", - " (precisions[i], recalls[i]), \n", - " xytext=(5, 5), textcoords='offset points')\n", - " \n", - " ax4.set_xlabel('Precision')\n", - " ax4.set_ylabel('Recall')\n", - " ax4.set_title('Precision vs Recall Trade-off')\n", - " ax4.grid(True, alpha=0.3)\n", - " ax4.legend()\n", - " \n", - " # Add diagonal line for F1 score reference\n", - " ax4.plot([0, 1], [0, 1], 'k--', alpha=0.3, label='Equal Precision/Recall')\n", - " \n", - " plt.tight_layout()\n", - " plt.show()\n", - "\n", - "# Run detailed analysis\n", - "if 'all_results' in locals() and all_results:\n", - " analyze_confusion_matrices()\n", - " compare_balancing_techniques_detailed()\n", - " \n", - " # CRITICAL: Add comprehensive confusion matrix variation analysis\n", - " print(\"\\n\" + \"=\" * 90)\n", - " print(\"๐ŸŽฏ COMPREHENSIVE CONFUSION MATRIX VARIATION ANALYSIS\")\n", - " print(\"=\" * 90)\n", - " print(\"\\nThis section analyzes how confusion matrices change across:\")\n", - " print(\"1. Different models (Logistic Regression, Random Forest, etc.)\")\n", - " print(\"2. Different parameter settings for each model\")\n", - " print(\"3. Different class balancing approaches (SMOTE, downsampling, etc.)\")\n", - " print(\"\\nFocus: Understanding precision/recall trade-offs and their business impact\")\n", - " \n", - " # Group results by model for comparison\n", - " model_comparison = defaultdict(list)\n", - " balancing_comparison = defaultdict(list)\n", - " parameter_comparison = defaultdict(list)\n", - " \n", - " for result in all_results:\n", - " metrics = result['metrics']\n", - " model_name = result['model_name']\n", - " balancing = result['balancing_technique']\n", - " param_str = result.get('param_string', 'default')\n", - " \n", - " # Group by model\n", - " model_comparison[model_name].append({\n", - " 'balancing': balancing,\n", - " 'params': param_str,\n", - " 'precision': metrics['precision'],\n", - " 'recall': metrics['recall'],\n", - " 'f1': metrics['f1_score'],\n", - " 'fp': metrics['false_positives'],\n", - " 'fn': metrics['false_negatives'],\n", - " 'tn': metrics['true_negatives'],\n", - " 'tp': metrics['true_positives']\n", - " })\n", - " \n", - " # Group by balancing technique\n", - " balancing_comparison[balancing].append({\n", - " 'model': model_name,\n", - " 'params': param_str,\n", - " 'precision': metrics['precision'],\n", - " 'recall': metrics['recall'],\n", - " 'f1': metrics['f1_score'],\n", - " 'fp': metrics['false_positives'],\n", - " 'fn': metrics['false_negatives']\n", - " })\n", - " \n", - " # Group by parameter variations (for models with multiple param settings)\n", - " if param_str != 'default':\n", - " key = f\"{model_name}_{balancing}\"\n", - " parameter_comparison[key].append({\n", - " 'params': param_str,\n", - " 'precision': metrics['precision'],\n", - " 'recall': metrics['recall'],\n", - " 'f1': metrics['f1_score'],\n", - " 'fp': metrics['false_positives'],\n", - " 'fn': metrics['false_negatives']\n", - " })\n", - " \n", - " # 1. MODEL COMPARISON ANALYSIS\n", - " print(f\"\\n๐Ÿค– 1. MODEL COMPARISON: How different algorithms affect confusion matrix\")\n", - " print(\"-\" * 70)\n", - " \n", - " for model_name, results in model_comparison.items():\n", - " if len(results) > 0:\n", - " avg_precision = np.mean([r['precision'] for r in results])\n", - " avg_recall = np.mean([r['recall'] for r in results])\n", - " avg_fp = np.mean([r['fp'] for r in results])\n", - " avg_fn = np.mean([r['fn'] for r in results])\n", - " \n", - " print(f\"\\n๐Ÿ“Š {model_name.upper()} (averaged across all configurations):\")\n", - " print(f\" โ€ข Average Precision: {avg_precision:.4f} โ†’ {avg_fp:.0f} false positives on average\")\n", - " print(f\" โ€ข Average Recall: {avg_recall:.4f} โ†’ {avg_fn:.0f} false negatives on average\")\n", - " \n", - " # Find best and worst configurations for this model\n", - " best_f1 = max(results, key=lambda x: x['f1'])\n", - " worst_f1 = min(results, key=lambda x: x['f1'])\n", - " \n", - " print(f\" โ€ข Best config: {best_f1['balancing']} + {best_f1['params']}\")\n", - " print(f\" โ†’ Precision: {best_f1['precision']:.4f}, Recall: {best_f1['recall']:.4f}\")\n", - " print(f\" โ†’ Confusion: {best_f1['tp']} TP, {best_f1['fp']} FP, {best_f1['fn']} FN, {best_f1['tn']} TN\")\n", - " \n", - " if len(results) > 1:\n", - " print(f\" โ€ข Worst config: {worst_f1['balancing']} + {worst_f1['params']}\")\n", - " print(f\" โ†’ Precision: {worst_f1['precision']:.4f}, Recall: {worst_f1['recall']:.4f}\")\n", - " print(f\" โ†’ Shows {model_name} sensitivity to configuration\")\n", - " \n", - " # 2. BALANCING TECHNIQUE COMPARISON\n", - " print(f\"\\nโš–๏ธ 2. BALANCING TECHNIQUE COMPARISON: How class balancing affects precision/recall\")\n", - " print(\"-\" * 80)\n", - " \n", - " for balancing, results in balancing_comparison.items():\n", - " if len(results) > 0:\n", - " avg_precision = np.mean([r['precision'] for r in results])\n", - " avg_recall = np.mean([r['recall'] for r in results])\n", - " avg_fp = np.mean([r['fp'] for r in results])\n", - " avg_fn = np.mean([r['fn'] for r in results])\n", - " \n", - " print(f\"\\n๐Ÿ“Š {balancing.upper().replace('_', ' ')} (averaged across all models):\")\n", - " print(f\" โ€ข Average Precision: {avg_precision:.4f} โ†’ {avg_fp:.0f} false positives on average\")\n", - " print(f\" โ€ข Average Recall: {avg_recall:.4f} โ†’ {avg_fn:.0f} false negatives on average\")\n", - " \n", - " # Explain the balancing technique's typical behavior\n", - " if balancing == 'smote':\n", - " print(f\" ๐Ÿ’ก SMOTE typically increases recall (catches more fraud) but may reduce precision\")\n", - " print(f\" โ†’ Synthetic samples help model learn minority class patterns\")\n", - " elif balancing == 'random_downsample':\n", - " print(f\" ๐Ÿ’ก Downsampling often improves precision but may hurt recall\")\n", - " print(f\" โ†’ Balanced classes but less training data\")\n", - " elif balancing == 'class_weight':\n", - " print(f\" ๐Ÿ’ก Class weighting balances precision/recall through loss function\")\n", - " print(f\" โ†’ Keeps all data but adjusts model's decision boundary\")\n", - " elif balancing == 'no_balancing':\n", - " print(f\" ๐Ÿ’ก No balancing typically shows high precision, low recall\")\n", - " print(f\" โ†’ Model biased toward majority class (non-fraud)\")\n", - " \n", - " # 3. PARAMETER VARIATION IMPACT\n", - " print(f\"\\n๐Ÿ”ง 3. PARAMETER VARIATION IMPACT: How hyperparameters change confusion matrix\")\n", - " print(\"-\" * 80)\n", - " \n", - " for key, results in parameter_comparison.items():\n", - " if len(results) > 1: # Only analyze if multiple parameter combinations\n", - " model_name, balancing = key.split('_', 1)\n", - " \n", - " print(f\"\\n๐Ÿ“Š {model_name.upper()} + {balancing.upper()}:\")\n", - " \n", - " # Sort by F1 score\n", - " sorted_results = sorted(results, key=lambda x: x['f1'], reverse=True)\n", - " best = sorted_results[0]\n", - " worst = sorted_results[-1]\n", - " \n", - " print(f\" โ€ข Best parameters ({best['params']}):\")\n", - " print(f\" โ†’ Precision: {best['precision']:.4f}, Recall: {best['recall']:.4f}\")\n", - " print(f\" โ†’ Errors: {best['fp']} false positives, {best['fn']} false negatives\")\n", - " \n", - " print(f\" โ€ข Worst parameters ({worst['params']}):\")\n", - " print(f\" โ†’ Precision: {worst['precision']:.4f}, Recall: {worst['recall']:.4f}\")\n", - " print(f\" โ†’ Errors: {worst['fp']} false positives, {worst['fn']} false negatives\")\n", - " \n", - " # Calculate the impact of parameter tuning\n", - " precision_change = best['precision'] - worst['precision']\n", - " recall_change = best['recall'] - worst['recall']\n", - " fp_change = best['fp'] - worst['fp']\n", - " fn_change = best['fn'] - worst['fn']\n", - " \n", - " print(f\" ๐Ÿ“ˆ Parameter tuning impact:\")\n", - " print(f\" โ†’ Precision change: {precision_change:+.4f}\")\n", - " print(f\" โ†’ Recall change: {recall_change:+.4f}\")\n", - " print(f\" โ†’ False positive change: {fp_change:+d} ({'better' if fp_change <= 0 else 'worse'})\")\n", - " print(f\" โ†’ False negative change: {fn_change:+d} ({'better' if fn_change <= 0 else 'worse'})\")\n", - " \n", - " # Business interpretation\n", - " if fp_change < 0 and fn_change < 0:\n", - " print(f\" โœ… WIN-WIN: Parameter tuning reduced both error types!\")\n", - " elif fp_change < 0:\n", - " print(f\" ๐ŸŽฏ PRECISION GAIN: Fewer false alarms (better customer experience)\")\n", - " elif fn_change < 0:\n", - " print(f\" ๐Ÿ” RECALL GAIN: Fewer missed frauds (better fraud detection)\")\n", - " else:\n", - " print(f\" โš–๏ธ TRADE-OFF: Overall F1 improved despite individual metric changes\")\n", - " \n", - " # 4. SUMMARY INSIGHTS\n", - " print(f\"\\n๐ŸŽฏ 4. KEY INSIGHTS: Confusion Matrix Variations Across All Dimensions\")\n", - " print(\"-\" * 70)\n", - " \n", - " # Find overall best and worst performers\n", - " all_metrics = [r['metrics'] for r in all_results]\n", - " best_overall = max(all_results, key=lambda x: x['metrics']['f1_score'])\n", - " worst_overall = min(all_results, key=lambda x: x['metrics']['f1_score'])\n", - " \n", - " print(f\"\\n๐Ÿ† BEST OVERALL CONFIGURATION:\")\n", - " print(f\" โ€ข {best_overall['model_name']} + {best_overall['balancing_technique']} + {best_overall.get('param_string', 'default')}\")\n", - " print(f\" โ€ข Confusion Matrix: {best_overall['metrics']['true_positives']} TP, {best_overall['metrics']['false_positives']} FP, {best_overall['metrics']['false_negatives']} FN, {best_overall['metrics']['true_negatives']} TN\")\n", - " print(f\" โ€ข Precision: {best_overall['metrics']['precision']:.4f}, Recall: {best_overall['metrics']['recall']:.4f}\")\n", - " \n", - " print(f\"\\n๐Ÿ“‰ WORST OVERALL CONFIGURATION:\")\n", - " print(f\" โ€ข {worst_overall['model_name']} + {worst_overall['balancing_technique']} + {worst_overall.get('param_string', 'default')}\")\n", - " print(f\" โ€ข Confusion Matrix: {worst_overall['metrics']['true_positives']} TP, {worst_overall['metrics']['false_positives']} FP, {worst_overall['metrics']['false_negatives']} FN, {worst_overall['metrics']['true_negatives']} TN\")\n", - " print(f\" โ€ข Precision: {worst_overall['metrics']['precision']:.4f}, Recall: {worst_overall['metrics']['recall']:.4f}\")\n", - " \n", - " # Calculate total improvement potential\n", - " precision_improvement = best_overall['metrics']['precision'] - worst_overall['metrics']['precision']\n", - " recall_improvement = best_overall['metrics']['recall'] - worst_overall['metrics']['recall']\n", - " fp_improvement = worst_overall['metrics']['false_positives'] - best_overall['metrics']['false_positives']\n", - " fn_improvement = worst_overall['metrics']['false_negatives'] - best_overall['metrics']['false_negatives']\n", - " \n", - " print(f\"\\n๐Ÿ“Š TOTAL IMPROVEMENT POTENTIAL (Best vs Worst):\")\n", - " print(f\" โ€ข Precision improvement: {precision_improvement:.4f}\")\n", - " print(f\" โ€ข Recall improvement: {recall_improvement:.4f}\")\n", - " print(f\" โ€ข False positives reduced by: {fp_improvement}\")\n", - " print(f\" โ€ข False negatives reduced by: {fn_improvement}\")\n", - " print(f\" โ€ข This demonstrates the critical importance of proper model selection,\")\n", - " print(f\" parameter tuning, and balancing technique choice!\")\n", - " \n", - "else:\n", - " print(\"โš ๏ธ No experiment results found. Please run the experiments first.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ๐Ÿ† Best Model Selection & Final Evaluation\n", - "**Select the best performing model and conduct final evaluation**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ================================\n", - "# ๐Ÿ† BEST MODEL SELECTION\n", - "# ================================\n", - "\n", - "def select_best_model(results):\n", - " \"\"\"\n", - " Select the best model based on F1 score and business considerations\n", - " \"\"\"\n", - " if not results:\n", - " print(\"โŒ No results available for model selection\")\n", - " return None\n", - " \n", - " print(\"๐Ÿ† BEST MODEL SELECTION\")\n", - " print(\"=\" * 40)\n", - " \n", - " # Find best model by F1 score\n", - " best_result = max(results, key=lambda x: x['metrics']['f1_score'])\n", - " best_metrics = best_result['metrics']\n", - " \n", - " print(f\"\\n๐Ÿฅ‡ BEST PERFORMING MODEL:\")\n", - " print(f\" โ€ข Model: {best_result['model_name'].replace('_', ' ').title()}\")\n", - " print(f\" โ€ข Balancing: {best_result['balancing_technique'].replace('_', ' ').title()}\")\n", - " print(f\" โ€ข F1 Score: {best_metrics['f1_score']:.4f}\")\n", - " print(f\" โ€ข Precision: {best_metrics['precision']:.4f}\")\n", - " print(f\" โ€ข Recall: {best_metrics['recall']:.4f}\")\n", - " print(f\" โ€ข Accuracy: {best_metrics['accuracy']:.4f}\")\n", - " \n", - " # Business impact analysis\n", - " fp_cost = best_metrics['false_positives'] * 10\n", - " fn_cost = best_metrics['false_negatives'] * 100\n", - " total_cost = fp_cost + fn_cost\n", - " \n", - " print(f\"\\n๐Ÿ’ฐ BUSINESS IMPACT:\")\n", - " print(f\" โ€ข False Positive Cost: ${fp_cost:,}\")\n", - " print(f\" โ€ข False Negative Cost: ${fn_cost:,}\")\n", - " print(f\" โ€ข Total Estimated Cost: ${total_cost:,}\")\n", - " \n", - " # Alternative recommendations\n", - " print(f\"\\n๐ŸŽฏ ALTERNATIVE CONSIDERATIONS:\")\n", - " \n", - " # Best precision model\n", - " best_precision = max(results, key=lambda x: x['metrics']['precision'])\n", - " if best_precision != best_result:\n", - " print(f\" โ€ข Best Precision: {best_precision['model_name'].title()} + {best_precision['balancing_technique'].title()} ({best_precision['metrics']['precision']:.4f})\")\n", - " print(f\" โ†’ Use if minimizing false alarms is critical\")\n", - " \n", - " # Best recall model\n", - " best_recall = max(results, key=lambda x: x['metrics']['recall'])\n", - " if best_recall != best_result:\n", - " print(f\" โ€ข Best Recall: {best_recall['model_name'].title()} + {best_recall['balancing_technique'].title()} ({best_recall['metrics']['recall']:.4f})\")\n", - " print(f\" โ†’ Use if catching all fraud is critical\")\n", - " \n", - " return best_result\n", - "\n", - "def save_best_model(best_result):\n", - " \"\"\"\n", - " Save the best model and its metadata\n", - " \"\"\"\n", - " if not best_result:\n", - " print(\"โŒ No best model to save\")\n", - " return\n", - " \n", - " print(f\"\\n๐Ÿ’พ SAVING BEST MODEL\")\n", - " print(\"=\" * 30)\n", - " \n", - " # Create model metadata\n", - " metadata = {\n", - " 'model_type': best_result['model_name'],\n", - " 'balancing_technique': best_result['balancing_technique'],\n", - " 'metrics': best_result['metrics'],\n", - " 'technique_info': best_result['technique_info'],\n", - " 'experiment_timestamp': pd.Timestamp.now().isoformat(),\n", - " 'configuration': {\n", - " 'models_tested': MODELS_TO_TEST,\n", - " 'balancing_tested': BALANCING_TECHNIQUES,\n", - " 'evaluation_config': EVALUATION_CONFIG\n", - " }\n", - " }\n", - " \n", - " # Save metadata\n", - " os.makedirs(config.MODELS_DIR, exist_ok=True)\n", - " \n", - " with open(config.MODEL_METADATA_PATH, 'w') as f:\n", - " json.dump(metadata, f, indent=4, default=str)\n", - " \n", - " print(f\"โœ… Model metadata saved to {config.MODEL_METADATA_PATH}\")\n", - " \n", - " # Save experiment results\n", - " results_path = config.MODELS_DIR / 'experiment_results.json'\n", - " with open(results_path, 'w') as f:\n", - " json.dump(all_results, f, indent=4, default=str)\n", - " \n", - " print(f\"โœ… Experiment results saved to {results_path}\")\n", - " \n", - " return metadata\n", - "\n", - "# Select and save best model\n", - "if 'all_results' in locals() and all_results:\n", - " best_model_result = select_best_model(all_results)\n", - " model_metadata = save_best_model(best_model_result)\n", - "else:\n", - " print(\"โš ๏ธ No experiment results found. Please run the experiments first.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ๐Ÿ“‹ Executive Summary & Recommendations\n", - "**Key findings and actionable insights from the comprehensive analysis**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ================================\n", - "# ๐Ÿ“‹ EXECUTIVE SUMMARY\n", - "# ================================\n", - "\n", - "def generate_executive_summary():\n", - " \"\"\"\n", - " Generate comprehensive executive summary of all experiments\n", - " \"\"\"\n", - " print(\"๐Ÿ“‹ EXECUTIVE SUMMARY: FRAUD DETECTION MODEL EXPERIMENTS\")\n", - " print(\"=\" * 70)\n", - " \n", - " if not all_results:\n", - " print(\"โŒ No results available for summary\")\n", - " return\n", - " \n", - " # Calculate summary statistics\n", - " total_experiments = len(all_results)\n", - " models_tested = len(set(r['model_name'] for r in all_results))\n", - " techniques_tested = len(set(r['balancing_technique'] for r in all_results))\n", - " \n", - " # Performance statistics\n", - " f1_scores = [r['metrics']['f1_score'] for r in all_results]\n", - " precisions = [r['metrics']['precision'] for r in all_results]\n", - " recalls = [r['metrics']['recall'] for r in all_results]\n", - " \n", - " print(f\"\\n๐Ÿ”ฌ EXPERIMENT OVERVIEW:\")\n", - " print(f\" โ€ข Total Experiments: {total_experiments}\")\n", - " print(f\" โ€ข Models Tested: {models_tested}\")\n", - " print(f\" โ€ข Balancing Techniques: {techniques_tested}\")\n", - " \n", - " print(f\"\\n๐Ÿ“Š PERFORMANCE SUMMARY:\")\n", - " print(f\" โ€ข F1 Score Range: {min(f1_scores):.4f} - {max(f1_scores):.4f}\")\n", - " print(f\" โ€ข Average F1 Score: {np.mean(f1_scores):.4f} ยฑ {np.std(f1_scores):.4f}\")\n", - " print(f\" โ€ข Precision Range: {min(precisions):.4f} - {max(precisions):.4f}\")\n", - " print(f\" โ€ข Recall Range: {min(recalls):.4f} - {max(recalls):.4f}\")\n", - " \n", - " # Best performers\n", - " best_f1 = max(all_results, key=lambda x: x['metrics']['f1_score'])\n", - " best_precision = max(all_results, key=lambda x: x['metrics']['precision'])\n", - " best_recall = max(all_results, key=lambda x: x['metrics']['recall'])\n", - " \n", - " print(f\"\\n๐Ÿ† TOP PERFORMERS:\")\n", - " print(f\" โ€ข Best F1: {best_f1['model_name'].title()} + {best_f1['balancing_technique'].title()} ({best_f1['metrics']['f1_score']:.4f})\")\n", - " print(f\" โ€ข Best Precision: {best_precision['model_name'].title()} + {best_precision['balancing_technique'].title()} ({best_precision['metrics']['precision']:.4f})\")\n", - " print(f\" โ€ข Best Recall: {best_recall['model_name'].title()} + {best_recall['balancing_technique'].title()} ({best_recall['metrics']['recall']:.4f})\")\n", - " \n", - " # Key insights\n", - " print(f\"\\n๐Ÿ’ก KEY INSIGHTS:\")\n", - " \n", - " # Model insights\n", - " model_performance = {}\n", - " for result in all_results:\n", - " model = result['model_name']\n", - " if model not in model_performance:\n", - " model_performance[model] = []\n", - " model_performance[model].append(result['metrics']['f1_score'])\n", - " \n", - " best_avg_model = max(model_performance.keys(), key=lambda x: np.mean(model_performance[x]))\n", - " print(f\" โ€ข Best Average Model: {best_avg_model.title()} (avg F1: {np.mean(model_performance[best_avg_model]):.4f})\")\n", - " \n", - " # Balancing insights\n", - " balancing_performance = {}\n", - " for result in all_results:\n", - " technique = result['balancing_technique']\n", - " if technique not in balancing_performance:\n", - " balancing_performance[technique] = []\n", - " balancing_performance[technique].append(result['metrics']['f1_score'])\n", - " \n", - " best_avg_balancing = max(balancing_performance.keys(), key=lambda x: np.mean(balancing_performance[x]))\n", - " print(f\" โ€ข Best Average Balancing: {best_avg_balancing.title()} (avg F1: {np.mean(balancing_performance[best_avg_balancing]):.4f})\")\n", - " \n", - " # Business recommendations\n", - " print(f\"\\n๐ŸŽฏ BUSINESS RECOMMENDATIONS:\")\n", - " \n", - " if best_f1['metrics']['precision'] > 0.8 and best_f1['metrics']['recall'] > 0.8:\n", - " print(f\" โœ… RECOMMENDED: Deploy {best_f1['model_name'].title()} with {best_f1['balancing_technique'].title()}\")\n", - " print(f\" โ†’ Excellent balance of precision and recall\")\n", - " print(f\" โ†’ Low false alarms AND high fraud detection\")\n", - " elif best_f1['metrics']['precision'] > 0.9:\n", - " print(f\" ๐ŸŽฏ CONSERVATIVE APPROACH: High precision model recommended\")\n", - " print(f\" โ†’ Minimizes customer inconvenience from false alarms\")\n", - " print(f\" โ†’ Consider for customer-facing applications\")\n", - " elif best_f1['metrics']['recall'] > 0.9:\n", - " print(f\" ๐Ÿ” AGGRESSIVE APPROACH: High recall model recommended\")\n", - " print(f\" โ†’ Maximizes fraud detection\")\n", - " print(f\" โ†’ Consider for high-risk scenarios\")\n", - " else:\n", - " print(f\" โš–๏ธ BALANCED APPROACH: Consider business priorities\")\n", - " print(f\" โ†’ Evaluate cost of false positives vs false negatives\")\n", - " \n", - " print(f\"\\n๐Ÿ”„ NEXT STEPS:\")\n", - " print(f\" 1. Deploy best model to staging environment\")\n", - " print(f\" 2. Conduct A/B testing with current system\")\n", - " print(f\" 3. Monitor performance on live data\")\n", - " print(f\" 4. Collect feedback and retrain as needed\")\n", - " print(f\" 5. Consider ensemble methods for further improvement\")\n", - "\n", - "# Generate executive summary\n", - "if 'all_results' in locals() and all_results:\n", - " generate_executive_summary()\n", - "else:\n", - " print(\"โš ๏ธ No experiment results found. Please run the experiments first.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ๐ŸŽ“ Experiment Conclusions\n", - "\n", - "This enhanced notebook provides a comprehensive framework for fraud detection model experimentation with:\n", - "\n", - "### โœ… **What We Accomplished:**\n", - "1. **๐Ÿ”ง Flexible Configuration**: Easy parameter modification for different hypotheses\n", - "2. **๐Ÿ”„ Model Switching**: Systematic testing of multiple algorithms\n", - "3. **โš–๏ธ Balancing Comparison**: SMOTE vs Downsampling vs Class Weighting analysis\n", - "4. **๐ŸŽฏ Detailed Analysis**: In-depth confusion matrix and precision/recall insights\n", - "5. **๐Ÿ“Š Comprehensive Evaluation**: Systematic comparison framework\n", - "\n", - "### ๐Ÿ” **Key Learnings:**\n", - "- **Precision vs Recall Trade-offs**: Different balancing techniques affect this balance differently\n", - "- **Model Sensitivity**: Some models are more sensitive to class imbalance than others\n", - "- **Business Impact**: Cost analysis helps guide model selection beyond just accuracy\n", - "- **Technique Effectiveness**: Each balancing approach has specific strengths and weaknesses\n", - "\n", - "### ๐Ÿš€ **Future Enhancements:**\n", - "- Add ensemble methods (voting, stacking)\n", - "- Implement advanced sampling techniques (ADASYN, BorderlineSMOTE)\n", - "- Include feature selection experiments\n", - "- Add hyperparameter optimization with Bayesian methods\n", - "- Implement cross-validation for more robust evaluation\n", - "\n", - "### ๐Ÿ“ˆ **Usage Instructions:**\n", - "1. **Modify Configuration**: Update the configuration section to test different hypotheses\n", - "2. **Run Experiments**: Execute all cells to run comprehensive experiments\n", - "3. **Analyze Results**: Review detailed analysis and confusion matrix insights\n", - "4. **Select Best Model**: Use business considerations to choose optimal model\n", - "5. **Deploy & Monitor**: Implement selected model with continuous monitoring\n", - "\n", - "This framework enables data scientists to systematically explore different approaches and make informed decisions based on comprehensive analysis rather than single metrics." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Class Imbalance Analysis" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Fraud detection typically involves highly imbalanced datasets, where fraudulent transactions are much less common than legitimate ones. Let's analyze the class distribution and consider techniques to handle this imbalance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Check class distribution\n", - "class_counts = y_train.value_counts()\n", - "class_percentages = class_counts / len(y_train) * 100\n", - "\n", - "print('Class distribution in training data:')\n", - "for i, (count, percentage) in enumerate(zip(class_counts, class_percentages)):\n", - " print(f'Class {i}: {count} samples ({percentage:.2f}%)')\n", - "\n", - "# Visualize class distribution\n", - "plt.figure(figsize=(10, 6))\n", - "sns.countplot(x=y_train)\n", - "plt.title('Class Distribution in Training Data')\n", - "plt.xlabel('Class (0 = Not Fraud, 1 = Fraud)')\n", - "plt.ylabel('Count')\n", - "\n", - "# Add count labels\n", - "for i, count in enumerate(class_counts):\n", - " plt.text(i, count + 100, f'{count:,}\\n({class_percentages[i]:.2f}%)', \n", - " ha='center', va='bottom', fontsize=12)\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Handling Class Imbalance with SMOTE" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We'll use Synthetic Minority Over-sampling Technique (SMOTE) to address the class imbalance by generating synthetic samples of the minority class." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import SMOTE\n", - "from imblearn.over_sampling import SMOTE\n", - "\n", - "# Create preprocessing pipeline for categorical and numerical features\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', StandardScaler(), numerical_cols),\n", - " ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)\n", - " ])\n", - "\n", - "# Apply preprocessing to training data\n", - "print('Preprocessing training data...')\n", - "X_train_processed = preprocessor.fit_transform(X_train)\n", - "\n", - "# Apply SMOTE to the preprocessed data\n", - "print('Applying SMOTE to handle class imbalance...')\n", - "smote = SMOTE(random_state=42)\n", - "X_train_resampled, y_train_resampled = smote.fit_resample(X_train_processed, y_train)\n", - "\n", - "print(f'Original training data shape: {X_train_processed.shape}')\n", - "print(f'Resampled training data shape: {X_train_resampled.shape}')\n", - "\n", - "# Check class distribution after SMOTE\n", - "resampled_class_counts = pd.Series(y_train_resampled).value_counts()\n", - "resampled_class_percentages = resampled_class_counts / len(y_train_resampled) * 100\n", - "\n", - "print('\n", - "Class distribution after SMOTE:')\n", - "for i, (count, percentage) in enumerate(zip(resampled_class_counts, resampled_class_percentages)):\n", - " print(f'Class {i}: {count} samples ({percentage:.2f}%)')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Model Training" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's train several machine learning models and compare their performance. We'll start with a simple model and then try more complex ones." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "{\n", - " \"cells\": [\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"# Model Training for Fraud Detection\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"This notebook focuses on training and evaluating machine learning models for fraud detection using the preprocessed transaction data.\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Import necessary libraries\\n\",\n", - " \"import pandas as pd\\n\",\n", - " \"import numpy as np\\n\",\n", - " \"import matplotlib.pyplot as plt\\n\",\n", - " \"import seaborn as sns\\n\",\n", - " \"import os\\n\",\n", - " \"import sys\\n\",\n", - " \"import joblib\\n\",\n", - " \"\\n\",\n", - " \"# Set plot style\\n\",\n", - " \"plt.style.use('seaborn-v0_8-whitegrid')\\n\",\n", - " \"sns.set(font_scale=1.2)\\n\",\n", - " \"\\n\",\n", - " \"# Configure plot size\\n\",\n", - " \"plt.rcParams['figure.figsize'] = (12, 8)\\n\",\n", - " \"\\n\",\n", - " \"# Display all columns\\n\",\n", - " \"pd.set_option('display.max_columns', None)\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Add the project root to the path so we can import from src\\n\",\n", - " \"sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))\\n\",\n", - " \"from src import config\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"## 1. Load the Preprocessed Data\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"Let's load the preprocessed training and test data that we created in the feature engineering notebook.\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Load preprocessed training data\\n\",\n", - " \"try:\\n\",\n", - " \" train_data = pd.read_csv(config.PROCESSED_TRAIN_DATA_PATH)\\n\",\n", - " \" print(f'Loaded preprocessed training data from {config.PROCESSED_TRAIN_DATA_PATH}')\\n\",\n", - " \"except FileNotFoundError:\\n\",\n", - " \" print(f'Preprocessed training data not found at {config.PROCESSED_TRAIN_DATA_PATH}')\\n\",\n", - " \" print('Please run the feature_engineering.ipynb notebook first to create the preprocessed data.')\\n\",\n", - " \" # If preprocessed data doesn't exist, we'll load and preprocess the raw data here\\n\",\n", - " \" # This is just a fallback and would normally be handled by the feature engineering notebook\\n\",\n", - " \" train_data = pd.read_csv(config.TRAIN_DATA_PATH)\\n\",\n", - " \" print(f'Loaded raw training data from {config.TRAIN_DATA_PATH} instead.')\\n\",\n", - " \"\\n\",\n", - " \"# Load preprocessed test data\\n\",\n", - " \"try:\\n\",\n", - " \" test_data = pd.read_csv(config.PROCESSED_TEST_DATA_PATH)\\n\",\n", - " \" print(f'Loaded preprocessed test data from {config.PROCESSED_TEST_DATA_PATH}')\\n\",\n", - " \"except FileNotFoundError:\\n\",\n", - " \" print(f'Preprocessed test data not found at {config.PROCESSED_TEST_DATA_PATH}')\\n\",\n", - " \" # If preprocessed data doesn't exist, we'll load the raw data\\n\",\n", - " \" test_data = pd.read_csv(config.TEST_DATA_PATH)\\n\",\n", - " \" print(f'Loaded raw test data from {config.TEST_DATA_PATH} instead.')\\n\",\n", - " \"\\n\",\n", - " \"print(f'\\nTraining data shape: {train_data.shape}')\\n\",\n", - " \"print(f'Test data shape: {test_data.shape}')\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Display the first few rows of the training data\\n\",\n", - " \"train_data.head()\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"## 2. Data Preparation\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"Let's prepare the data for model training by splitting it into features and target variables, and then into training and validation sets.\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Import necessary libraries for model training\\n\",\n", - " \"from sklearn.model_selection import train_test_split\\n\",\n", - " \"from sklearn.preprocessing import StandardScaler, OneHotEncoder\\n\",\n", - " \"from sklearn.compose import ColumnTransformer\\n\",\n", - " \"from sklearn.pipeline import Pipeline\\n\",\n", - " \"\\n\",\n", - " \"# Check if the target variable exists in the data\\n\",\n", - " \"if 'is_fraud' in train_data.columns:\\n\",\n", - " \" # Split features and target\\n\",\n", - " \" X = train_data.drop('is_fraud', axis=1)\\n\",\n", - " \" y = train_data['is_fraud']\\n\",\n", - " \" \\n\",\n", - " \" # Split into training and validation sets\\n\",\n", - " \" X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\\n\",\n", - " \" \\n\",\n", - " \" print(f'Training features shape: {X_train.shape}')\\n\",\n", - " \" print(f'Validation features shape: {X_val.shape}')\\n\",\n", - " \" print(f'Training target shape: {y_train.shape}')\\n\",\n", - " \" print(f'Validation target shape: {y_val.shape}')\\n\",\n", - " \"else:\\n\",\n", - " \" print('Target variable 'is_fraud' not found in the data. Please check the data preprocessing step.')\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Identify categorical and numerical features\\n\",\n", - " \"categorical_cols = X_train.select_dtypes(include=['object', 'category']).columns.tolist()\\n\",\n", - " \"numerical_cols = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()\\n\",\n", - " \"\\n\",\n", - " \"print(f'Categorical features: {categorical_cols}')\\n\",\n", - " \"print(f'Numerical features: {numerical_cols}')\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"## 3. Class Imbalance Analysis\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"Fraud detection typically involves highly imbalanced datasets, where fraudulent transactions are much less common than legitimate ones. Let's analyze the class distribution and consider techniques to handle this imbalance.\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Check class distribution\\n\",\n", - " \"class_counts = y_train.value_counts()\\n\",\n", - " \"class_percentages = class_counts / len(y_train) * 100\\n\",\n", - " \"\\n\",\n", - " \"print('Class distribution in training data:')\\n\",\n", - " \"for i, (count, percentage) in enumerate(zip(class_counts, class_percentages)):\\n\",\n", - " \" print(f'Class {i}: {count} samples ({percentage:.2f}%)')\\n\",\n", - " \"\\n\",\n", - " \"# Visualize class distribution\\n\",\n", - " \"plt.figure(figsize=(10, 6))\\n\",\n", - " \"sns.countplot(x=y_train)\\n\",\n", - " \"plt.title('Class Distribution in Training Data')\\n\",\n", - " \"plt.xlabel('Class (0 = Not Fraud, 1 = Fraud)')\\n\",\n", - " \"plt.ylabel('Count')\\n\",\n", - " \"\\n\",\n", - " \"# Add count labels\\n\",\n", - " \"for i, count in enumerate(class_counts):\\n\",\n", - " \" plt.text(i, count + 100, f'{count:,}\\n({class_percentages[i]:.2f}%)', \\n\",\n", - " \" ha='center', va='bottom', fontsize=12)\\n\",\n", - " \"\\n\",\n", - " \"plt.show()\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"### Handling Class Imbalance with SMOTE\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"We'll use Synthetic Minority Over-sampling Technique (SMOTE) to address the class imbalance by generating synthetic samples of the minority class.\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Import SMOTE\\n\",\n", - " \"from imblearn.over_sampling import SMOTE\\n\",\n", - " \"\\n\",\n", - " \"# Create preprocessing pipeline for categorical and numerical features\\n\",\n", - " \"preprocessor = ColumnTransformer(\\n\",\n", - " \" transformers=[\\n\",\n", - " \" ('num', StandardScaler(), numerical_cols),\\n\",\n", - " \" ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)\\n\",\n", - " \" ])\\n\",\n", - " \"\\n\",\n", - " \"# Apply preprocessing to training data\\n\",\n", - " \"print('Preprocessing training data...')\\n\",\n", - " \"X_train_processed = preprocessor.fit_transform(X_train)\\n\",\n", - " \"\\n\",\n", - " \"# Apply SMOTE to the preprocessed data\\n\",\n", - " \"print('Applying SMOTE to handle class imbalance...')\\n\",\n", - " \"smote = SMOTE(random_state=42)\\n\",\n", - " \"X_train_resampled, y_train_resampled = smote.fit_resample(X_train_processed, y_train)\\n\",\n", - " \"\\n\",\n", - " \"print(f'Original training data shape: {X_train_processed.shape}')\\n\",\n", - " \"print(f'Resampled training data shape: {X_train_resampled.shape}')\\n\",\n", - " \"\\n\",\n", - " \"# Check class distribution after SMOTE\\n\",\n", - " \"resampled_class_counts = pd.Series(y_train_resampled).value_counts()\\n\",\n", - " \"resampled_class_percentages = resampled_class_counts / len(y_train_resampled) * 100\\n\",\n", - " \"\\n\",\n", - " \"print('\\nClass distribution after SMOTE:')\\n\",\n", - " \"for i, (count, percentage) in enumerate(zip(resampled_class_counts, resampled_class_percentages)):\\n\",\n", - " \" print(f'Class {i}: {count} samples ({percentage:.2f}%)')\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"## 4. Model Training\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"Now let's train several machine learning models and compare their performance. We'll start with a simple model and then try more complex ones.\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Import models and evaluation metrics\\n\",\n", - " \"from sklearn.linear_model import LogisticRegression\\n\",\n", - " \"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\\n\",\n", - " \"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report\\n\",\n", - " \"\\n\",\n", - " \"# Function to evaluate model performance\\n\",\n", - " \"def evaluate_model(model, X_test, y_test, model_name):\\n\",\n", - " \" # Make predictions\\n\",\n", - " \" y_pred = model.predict(X_test)\\n\",\n", - " \" \\n\",\n", - " \" # Calculate metrics\\n\",\n", - " \" accuracy = accuracy_score(y_test, y_pred)\\n\",\n", - " \" precision = precision_score(y_test, y_pred)\\n\",\n", - " \" recall = recall_score(y_test, y_pred)\\n\",\n", - " \" f1 = f1_score(y_test, y_pred)\\n\",\n", - " \" \\n\",\n", - " \" # Print metrics\\n\",\n", - " \" print(f'\\n{model_name} Performance:')\\n\",\n", - " \" print(f'Accuracy: {accuracy:.4f}')\\n\",\n", - " \" print(f'Precision: {precision:.4f}')\\n\",\n", - " \" print(f'Recall: {recall:.4f}')\\n\",\n", - " \" print(f'F1 Score: {f1:.4f}')\\n\",\n", - " \" \\n\",\n", - " \" # Print confusion matrix\\n\",\n", - " \" cm = confusion_matrix(y_test, y_pred)\\n\",\n", - " \" print('\\nConfusion Matrix:')\\n\",\n", - " \" print(cm)\\n\",\n", - " \" \\n\",\n", - " \" # Plot confusion matrix\\n\",\n", - " \" plt.figure(figsize=(8, 6))\\n\",\n", - " \" sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)\\n\",\n", - " \" plt.xlabel('Predicted')\\n\",\n", - " \" plt.ylabel('True')\\n\",\n", - " \" plt.title(f'Confusion Matrix - {model_name}')\\n\",\n", - " \" plt.show()\\n\",\n", - " \" \\n\",\n", - " \" # Print classification report\\n\",\n", - " \" print('\\nClassification Report:')\\n\",\n", - " \" print(classification_report(y_test, y_pred))\\n\",\n", - " \" \\n\",\n", - " \" return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1, 'confusion_matrix': cm}\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"### 4.1 Logistic Regression\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Train Logistic Regression model\\n\",\n", - " \"print('Training Logistic Regression model...')\\n\",\n", - " \"lr_model = LogisticRegression(random_state=42, max_iter=1000, class_weight='balanced')\\n\",\n", - " \"lr_model.fit(X_train_resampled, y_train_resampled)\\n\",\n", - " \"\\n\",\n", - " \"# Preprocess validation data\\n\",\n", - " \"X_val_processed = preprocessor.transform(X_val)\\n\",\n", - " \"\\n\",\n", - " \"# Evaluate model\\n\",\n", - " \"lr_metrics = evaluate_model(lr_model, X_val_processed, y_val, 'Logistic Regression')\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"### 4.2 Random Forest\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Train Random Forest model\\n\",\n", - " \"print('Training Random Forest model...')\\n\",\n", - " \"rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')\\n\",\n", - " \"rf_model.fit(X_train_resampled, y_train_resampled)\\n\",\n", - " \"\\n\",\n", - " \"# Evaluate model\\n\",\n", - " \"rf_metrics = evaluate_model(rf_model, X_val_processed, y_val, 'Random Forest')\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"### 4.3 Gradient Boosting\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Train Gradient Boosting model\\n\",\n", - " \"print('Training Gradient Boosting model...')\\n\",\n", - " \"gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)\\n\",\n", - " \"gb_model.fit(X_train_resampled, y_train_resampled)\\n\",\n", - " \"\\n\",\n", - " \"# Evaluate model\\n\",\n", - " \"gb_metrics = evaluate_model(gb_model, X_val_processed, y_val, 'Gradient Boosting')\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"## 5. Model Comparison\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"Let's compare the performance of the different models to select the best one.\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Create a DataFrame to compare model performance\\n\",\n", - " \"models = ['Logistic Regression', 'Random Forest', 'Gradient Boosting']\\n\",\n", - " \"metrics = ['accuracy', 'precision', 'recall', 'f1']\\n\",\n", - " \"\\n\",\n", - " \"comparison_data = []\\n\",\n", - " \"for metric in metrics:\\n\",\n", - " \" comparison_data.append([\\n\",\n", - " \" lr_metrics[metric],\\n\",\n", - " \" rf_metrics[metric],\\n\",\n", - " \" gb_metrics[metric]\\n\",\n", - " \" ])\\n\",\n", - " \"\\n\",\n", - " \"comparison_df = pd.DataFrame(comparison_data, columns=models, index=metrics)\\n\",\n", - " \"\\n\",\n", - " \"# Display the comparison table\\n\",\n", - " \"print('Model Performance Comparison:')\\n\",\n", - " \"comparison_df\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Visualize model comparison\\n\",\n", - " \"plt.figure(figsize=(12, 8))\\n\",\n", - " \"comparison_df.plot(kind='bar', figsize=(12, 8))\\n\",\n", - " \"plt.title('Model Performance Comparison')\\n\",\n", - " \"plt.xlabel('Metric')\\n\",\n", - " \"plt.ylabel('Score')\\n\",\n", - " \"plt.xticks(rotation=0)\\n\",\n", - " \"plt.legend(title='Model')\\n\",\n", - " \"plt.grid(axis='y')\\n\",\n", - " \"\\n\",\n", - " \"# Add value labels\\n\",\n", - " \"for i, metric in enumerate(metrics):\\n\",\n", - " \" for j, model in enumerate(models):\\n\",\n", - " \" value = comparison_df.iloc[i, j]\\n\",\n", - " \" plt.text(i + (j - 1) * 0.3, value + 0.01, f'{value:.4f}', ha='center', va='bottom', fontsize=9)\\n\",\n", - " \"\\n\",\n", - " \"plt.tight_layout()\\n\",\n", - " \"plt.show()\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"## 6. Feature Importance\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"Let's analyze which features are most important for the best performing model (Random Forest in this case).\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Get feature names after one-hot encoding\\n\",\n", - " \"# For numerical features, the names remain the same\\n\",\n", - " \"# For categorical features, we need to get the one-hot encoded feature names\\n\",\n", - " \"\\n\",\n", - " \"# Get the one-hot encoder from the preprocessor\\n\",\n", - " \"ohe = preprocessor.named_transformers_['cat']\\n\",\n", - " \"\\n\",\n", - " \"# Get the one-hot encoded feature names\\n\",\n", - " \"categorical_features = []\\n\",\n", - " \"for i, category in enumerate(categorical_cols):\\n\",\n", - " \" values = ohe.categories_[i]\\n\",\n", - " \" for value in values:\\n\",\n", - " \" categorical_features.append(f'{category}_{value}')\\n\",\n", - " \"\\n\",\n", - " \"# Combine with numerical feature names\\n\",\n", - " \"feature_names = numerical_cols + categorical_features\\n\",\n", - " \"\\n\",\n", - " \"# Get feature importances from the Random Forest model\\n\",\n", - " \"importances = rf_model.feature_importances_\\n\",\n", - " \"\\n\",\n", - " \"# Create a DataFrame for visualization\\n\",\n", - " \"feature_importance = pd.DataFrame({\\n\",\n", - " \" 'Feature': feature_names,\\n\",\n", - " \" 'Importance': importances\\n\",\n", - " \"}).sort_values('Importance', ascending=False)\\n\",\n", - " \"\\n\",\n", - " \"# Display the top 20 most important features\\n\",\n", - " \"print('Top 20 Most Important Features:')\\n\",\n", - " \"feature_importance.head(20)\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Visualize feature importance\\n\",\n", - " \"plt.figure(figsize=(12, 10))\\n\",\n", - " \"sns.barplot(x='Importance', y='Feature', data=feature_importance.head(20))\\n\",\n", - " \"plt.title('Top 20 Feature Importance')\\n\",\n", - " \"plt.xlabel('Importance')\\n\",\n", - " \"plt.ylabel('Feature')\\n\",\n", - " \"plt.tight_layout()\\n\",\n", - " \"plt.show()\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"## 7. Save the Best Model\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"Let's save the best performing model (Random Forest) for later use.\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": null,\n", - " \"metadata\": {},\n", - " \"outputs\": [],\n", - " \"source\": [\n", - " \"# Create a full pipeline with preprocessing and the best model\\n\",\n", - " \"best_model = Pipeline(steps=[\\n\",\n", - " \" ('preprocessor', preprocessor),\\n\",\n", - " \" ('classifier', rf_model)\\n\",\n", - " \"])\\n\",\n", - " \"\\n\",\n", - " \"# Save the model\\n\",\n", - " \"import os\\n\",\n", - " \"os.makedirs(config.MODELS_DIR, exist_ok=True)\\n\",\n", - " \"joblib.dump(best_model, config.MODEL_PATH)\\n\",\n", - " \"print(f'Model saved to {config.MODEL_PATH}')\\n\",\n", - " \"\\n\",\n", - " \"# Save model metadata\\n\",\n", - " \"import json\\n\",\n", - " \"metadata = {\\n\",\n", - " \" 'model_type': 'RandomForestClassifier',\\n\",\n", - " \" 'metrics': {\\n\",\n", - " \" 'accuracy': float(rf_metrics['accuracy']),\\n\",\n", - " \" 'precision': float(rf_metrics['precision']),\\n\",\n", - " \" 'recall': float(rf_metrics['recall']),\\n\",\n", - " \" 'f1': float(rf_metrics['f1'])\\n\",\n", - " \" },\\n\",\n", - " \" 'feature_importance': feature_importance.head(20).to_dict(orient='records'),\\n\",\n", - " \" 'features': X_train.columns.tolist()\\n\",\n", - " \"}\\n\",\n", - " \"\\n\",\n", - " \"with open(config.MODEL_METADATA_PATH, 'w') as f:\\n\",\n", - " \" json.dump(metadata, f, indent=4)\\n\",\n", - " \"\\n\",\n", - " \"print(f'Model metadata saved to {config.MODEL_METADATA_PATH}')\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"## 8. Summary\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"In this notebook, we trained and evaluated several machine learning models for fraud detection:\\n\",\n", - " \"\\n\",\n", - " \"1. **Data Preparation**: We loaded the preprocessed data and split it into training and validation sets.\\n\",\n", - " \"\\n\",\n", - " \"2. **Class Imbalance**: We addressed the class imbalance problem using SMOTE to generate synthetic samples of the minority class.\\n\",\n", - " \"\\n\",\n", - " \"3. **Model Training**: We trained three different models - Logistic Regression, Random Forest, and Gradient Boosting.\\n\",\n", - " \"\\n\",\n", - " \"4. **Model Evaluation**: We evaluated the models using accuracy, precision, recall, and F1 score, with a focus on the F1 score due to the class imbalance.\\n\",\n", - " \"\\n\",\n", - " \"5. **Model Comparison**: We compared the performance of the different models and found that Random Forest performed the best overall.\\n\",\n", - " \"\\n\",\n", - " \"6. **Feature Importance**: We analyzed which features were most important for the Random Forest model.\\n\",\n", - " \"\\n\",\n", - " \"7. **Model Saving**: We saved the best model (Random Forest) and its metadata for later use.\\n\",\n", - " \"\\n\",\n", - " \"The Random Forest model achieved good performance in detecting fraudulent transactions, with a balance between precision and recall as reflected in the F1 score. The most important features for fraud detection included transaction amount, distance between cardholder and merchant, and time-based features.\\n\",\n", - " \"\\n\",\n", - " \"Next steps could include:\\n\",\n", - " \"- Fine-tuning the model hyperparameters using grid search or random search\\n\",\n", - " \"- Trying more advanced models like XGBoost or neural networks\\n\",\n", - " \"- Implementing the model in a production environment for real-time fraud detection\"\n", - " ]\n", - " }\n", - " ],\n", - " \"metadata\": {\n", - " \"kernelspec\": {\n", - " \"display_name\": \"Python 3\",\n", - " \"language\": \"python\",\n", - " \"name\": \"python3\"\n", - " },\n", - " \"language_info\": {\n", - " \"codemirror_mode\": {\n", - " \"name\": \"ipython\",\n", - " \"version\": 3\n", - " },\n", - " \"file_extension\": \".py\",\n", - " \"mimetype\": \"text/x-python\",\n", - " \"name\": \"python\",\n", - " \"nbconvert_exporter\": \"python\",\n", - " \"pygments_lexer\": \"ipython3\",\n", - " \"version\": \"3.8.10\"\n", - " }\n", - " },\n", - " \"nbformat\": 4,\n", - " \"nbformat_minor\": 4\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.1 Logistic Regression" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Train Logistic Regression model\n", - "print('Training Logistic Regression model...')\n", - "lr_model = LogisticRegression(random_state=42, max_iter=1000, class_weight='balanced')\n", - "lr_model.fit(X_train_resampled, y_train_resampled)\n", - "\n", - "# Preprocess validation data\n", - "X_val_processed = preprocessor.transform(X_val)\n", - "\n", - "# Evaluate model\n", - "lr_metrics = evaluate_model(lr_model, X_val_processed, y_val, 'Logistic Regression')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.2 Random Forest" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Train Random Forest model\n", - "print('Training Random Forest model...')\n", - "rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')\n", - "rf_model.fit(X_train_resampled, y_train_resampled)\n", - "\n", - "# Evaluate model\n", - "rf_metrics = evaluate_model(rf_model, X_val_processed, y_val, 'Random Forest')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.3 Gradient Boosting" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Train Gradient Boosting model\n", - "print('Training Gradient Boosting model...')\n", - "gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)\n", - "gb_model.fit(X_train_resampled, y_train_resampled)\n", - "\n", - "# Evaluate model\n", - "gb_metrics = evaluate_model(gb_model, X_val_processed, y_val, 'Gradient Boosting')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Model Comparison" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's compare the performance of the different models to select the best one." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a DataFrame to compare model performance\n", - "models = ['Logistic Regression', 'Random Forest', 'Gradient Boosting']\n", - "metrics = ['accuracy', 'precision', 'recall', 'f1']\n", - "\n", - "comparison_data = []\n", - "for metric in metrics:\n", - " comparison_data.append([\n", - " lr_metrics[metric],\n", - " rf_metrics[metric],\n", - " gb_metrics[metric]\n", - " ])\n", - "\n", - "comparison_df = pd.DataFrame(comparison_data, columns=models, index=metrics)\n", - "\n", - "# Display the comparison table\n", - "print('Model Performance Comparison:')\n", - "comparison_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Visualize model comparison\n", - "plt.figure(figsize=(12, 8))\n", - "comparison_df.plot(kind='bar', figsize=(12, 8))\n", - "plt.title('Model Performance Comparison')\n", - "plt.xlabel('Metric')\n", - "plt.ylabel('Score')\n", - "plt.xticks(rotation=0)\n", - "plt.legend(title='Model')\n", - "plt.grid(axis='y')\n", - "\n", - "# Add value labels\n", - "for i, metric in enumerate(metrics):\n", - " for j, model in enumerate(models):\n", - " value = comparison_df.iloc[i, j]\n", - " plt.text(i + (j - 1) * 0.3, value + 0.01, f'{value:.4f}', ha='center', va='bottom', fontsize=9)\n", - "\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 6. Feature Importance" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's analyze which features are most important for the best performing model (Random Forest in this case)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get feature names after one-hot encoding\n", - "# For numerical features, the names remain the same\n", - "# For categorical features, we need to get the one-hot encoded feature names\n", - "\n", - "# Get the one-hot encoder from the preprocessor\n", - "ohe = preprocessor.named_transformers_['cat']\n", - "\n", - "# Get the one-hot encoded feature names\n", - "categorical_features = []\n", - "for i, category in enumerate(categorical_cols):\n", - " values = ohe.categories_[i]\n", - " for value in values:\n", - " categorical_features.append(f'{category}_{value}')\n", - "\n", - "# Combine with numerical feature names\n", - "feature_names = numerical_cols + categorical_features\n", - "\n", - "# Get feature importances from the Random Forest model\n", - "importances = rf_model.feature_importances_\n", - "\n", - "# Create a DataFrame for visualization\n", - "feature_importance = pd.DataFrame({\n", - " 'Feature': feature_names,\n", - " 'Importance': importances\n", - "}).sort_values('Importance', ascending=False)\n", - "\n", - "# Display the top 20 most important features\n", - "print('Top 20 Most Important Features:')\n", - "feature_importance.head(20)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Visualize feature importance\n", - "plt.figure(figsize=(12, 10))\n", - "sns.barplot(x='Importance', y='Feature', data=feature_importance.head(20))\n", - "plt.title('Top 20 Feature Importance')\n", - "plt.xlabel('Importance')\n", - "plt.ylabel('Feature')\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 7. Save the Best Model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's save the best performing model (Random Forest) for later use." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a full pipeline with preprocessing and the best model\n", - "best_model = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('classifier', rf_model)\n", - "])\n", - "\n", - "# Save the model\n", - "import os\n", - "os.makedirs(config.MODELS_DIR, exist_ok=True)\n", - "joblib.dump(best_model, config.MODEL_PATH)\n", - "print(f'Model saved to {config.MODEL_PATH}')\n", - "\n", - "# Save model metadata\n", - "import json\n", - "metadata = {\n", - " 'model_type': 'RandomForestClassifier',\n", - " 'metrics': {\n", - " 'accuracy': float(rf_metrics['accuracy']),\n", - " 'precision': float(rf_metrics['precision']),\n", - " 'recall': float(rf_metrics['recall']),\n", - " 'f1': float(rf_metrics['f1'])\n", - " },\n", - " 'feature_importance': feature_importance.head(20).to_dict(orient='records'),\n", - " 'features': X_train.columns.tolist()\n", - "}\n", - "\n", - "with open(config.MODEL_METADATA_PATH, 'w') as f:\n", - " json.dump(metadata, f, indent=4)\n", - "\n", - "print(f'Model metadata saved to {config.MODEL_METADATA_PATH}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 8. Summary" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this notebook, we trained and evaluated several machine learning models for fraud detection:\n", - "\n", - "1. **Data Preparation**: We loaded the preprocessed data and split it into training and validation sets.\n", - "\n", - "2. **Class Imbalance**: We addressed the class imbalance problem using SMOTE to generate synthetic samples of the minority class.\n", - "\n", - "3. **Model Training**: We trained three different models - Logistic Regression, Random Forest, and Gradient Boosting.\n", - "\n", - "4. **Model Evaluation**: We evaluated the models using accuracy, precision, recall, and F1 score, with a focus on the F1 score due to the class imbalance.\n", - "\n", - "5. **Model Comparison**: We compared the performance of the different models and found that Random Forest performed the best overall.\n", - "\n", - "6. **Feature Importance**: We analyzed which features were most important for the Random Forest model.\n", - "\n", - "7. **Model Saving**: We saved the best model (Random Forest) and its metadata for later use.\n", - "\n", - "The Random Forest model achieved good performance in detecting fraudulent transactions, with a balance between precision and recall as reflected in the F1 score. The most important features for fraud detection included transaction amount, distance between cardholder and merchant, and time-based features.\n", - "\n", - "Next steps could include:\n", - "- Fine-tuning the model hyperparameters using grid search or random search\n", - "- Trying more advanced models like XGBoost or neural networks\n", - "- Implementing the model in a production environment for real-time fraud detection" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Model Training for Fraud Detection" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook focuses on training and evaluating machine learning models for fraud detection using the preprocessed transaction data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import necessary libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import os\n", + "import sys\n", + "import joblib\n", + "\n", + "# Set plot style\n", + "plt.style.use('seaborn-v0_8-whitegrid')\n", + "sns.set(font_scale=1.2)\n", + "\n", + "# Configure plot size\n", + "plt.rcParams['figure.figsize'] = (12, 8)\n", + "\n", + "# Display all columns\n", + "pd.set_option('display.max_columns', None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Add the project root to the path so we can import from src\n", + "sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))\n", + "from src import config" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Load the Preprocessed Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's load the preprocessed training and test data that we created in the feature engineering notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load preprocessed training data\n", + "try:\n", + " train_data = pd.read_csv(config.PROCESSED_TRAIN_DATA_PATH)\n", + " print(f'Loaded preprocessed training data from {config.PROCESSED_TRAIN_DATA_PATH}')\n", + "except FileNotFoundError:\n", + " print(f'Preprocessed training data not found at {config.PROCESSED_TRAIN_DATA_PATH}')\n", + " print('Please run the feature_engineering.ipynb notebook first to create the preprocessed data.')\n", + " # If preprocessed data doesn't exist, we'll load and preprocess the raw data here\n", + " # This is just a fallback and would normally be handled by the feature engineering notebook\n", + " train_data = pd.read_csv(config.TRAIN_DATA_PATH)\n", + " print(f'Loaded raw training data from {config.TRAIN_DATA_PATH} instead.')\n", + "\n", + "# Load preprocessed test data\n", + "try:\n", + " test_data = pd.read_csv(config.PROCESSED_TEST_DATA_PATH)\n", + " print(f'Loaded preprocessed test data from {config.PROCESSED_TEST_DATA_PATH}')\n", + "except FileNotFoundError:\n", + " print(f'Preprocessed test data not found at {config.PROCESSED_TEST_DATA_PATH}')\n", + " # If preprocessed data doesn't exist, we'll load the raw data\n", + " test_data = pd.read_csv(config.TEST_DATA_PATH)\n", + " print(f'Loaded raw test data from {config.TEST_DATA_PATH} instead.')\n", + "\n", + "print(f'\nTraining data shape: {train_data.shape}')\n", + "print(f'Test data shape: {test_data.shape}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Display the first few rows of the training data\n", + "train_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Data Preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's prepare the data for model training by splitting it into features and target variables, and then into training and validation sets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import necessary libraries for model training\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.pipeline import Pipeline\n", + "\n", + "# Check if the target variable exists in the data\n", + "if 'is_fraud' in train_data.columns:\n", + " # Split features and target\n", + " X = train_data.drop('is_fraud', axis=1)\n", + " y = train_data['is_fraud']\n", + " \n", + " # Split into training and validation sets\n", + " X_train, X_val, y_train, y_val = train_test_split(\n", + " X, y, test_size=params['test_size'], random_state=params['random_state'], stratify=y)\n", + " \n", + " print(f'Training features shape: {X_train.shape}')\n", + " print(f'Validation features shape: {X_val.shape}')\n", + " print(f'Training target shape: {y_train.shape}')\n", + " print(f'Validation target shape: {y_val.shape}')\n", + "else:\n", + " print(\"Target variable 'is_fraud' not found in the data. Please check the data preprocessing step.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Identify categorical and numerical features\n", + "categorical_cols = X_train.select_dtypes(include=['object', 'category']).columns.tolist()\n", + "numerical_cols = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()\n", + "\n", + "print(f'Categorical features: {categorical_cols}')\n", + "print(f'Numerical features: {numerical_cols}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Class Imbalance Analysis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fraud detection typically involves highly imbalanced datasets, where fraudulent transactions are much less common than legitimate ones. Let's analyze the class distribution and consider techniques to handle this imbalance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check class distribution\n", + "class_counts = y_train.value_counts()\n", + "class_percentages = class_counts / len(y_train) * 100\n", + "\n", + "print('Class distribution in training data:')\n", + "for i, (count, percentage) in enumerate(zip(class_counts, class_percentages)):\n", + " print(f'Class {i}: {count} samples ({percentage:.2f}%)')\n", + "\n", + "# Visualize class distribution\n", + "plt.figure(figsize=(10, 6))\n", + "sns.countplot(x=y_train)\n", + "plt.title('Class Distribution in Training Data')\n", + "plt.xlabel('Class (0 = Not Fraud, 1 = Fraud)')\n", + "plt.ylabel('Count')\n", + "\n", + "# Add count labels\n", + "for i, count in enumerate(class_counts):\n", + " plt.text(i, count + 100, f'{count:,}\n({class_percentages[i]:.2f}%)', \n", + " ha='center', va='bottom', fontsize=12)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Handling Class Imbalance with SMOTE" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll use Synthetic Minority Over-sampling Technique (SMOTE) to address the class imbalance by generating synthetic samples of the minority class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import SMOTE\n", + "from imblearn.over_sampling import SMOTE\n", + "from sklearn.utils import resample\n", + "\n", + "# Create preprocessing pipeline for categorical and numerical features\n", + "preprocessor = ColumnTransformer(\n", + " transformers=[\n", + " ('num', StandardScaler(), numerical_cols),\n", + " ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)\n", + " ])\n", + "\n", + "# Apply preprocessing to training data\n", + "print('Preprocessing training data...')\n", + "X_train_processed = preprocessor.fit_transform(X_train)\n", + "\n", + "# Apply selected class balancing technique\n", + "balancing = params.get('balancing', 'smote')\n", + "if balancing == 'smote':\n", + " print('Applying SMOTE to handle class imbalance...')\n", + " smote = SMOTE(random_state=params['smote']['random_state'])\n", + " X_train_resampled, y_train_resampled = smote.fit_resample(X_train_processed, y_train)\n", + "elif balancing == 'downsample':\n", + " print('Applying downsampling to handle class imbalance...')\n", + " # Concatenate features and target for downsampling\n", + " Xy = pd.DataFrame(X_train_processed.todense() if hasattr(X_train_processed, 'todense') else X_train_processed)\n", + " Xy['target'] = y_train.values\n", + " # Separate majority and minority classes\n", + " majority = Xy[Xy['target'] == 0]\n", + " minority = Xy[Xy['target'] == 1]\n", + " # Downsample majority class\n", + " majority_downsampled = resample(majority,\n", + " replace=False,\n", + " n_samples=len(minority),\n", + " random_state=params['random_state'])\n", + " Xy_downsampled = pd.concat([majority_downsampled, minority])\n", + " X_train_resampled = Xy_downsampled.drop('target', axis=1).values\n", + " y_train_resampled = Xy_downsampled['target'].values\n", + "elif balancing == 'none':\n", + " print('No class balancing applied.')\n", + " X_train_resampled, y_train_resampled = X_train_processed, y_train\n", + "else:\n", + " raise ValueError(f'Unknown balancing method: {balancing}')\n", + "\n", + "print(f'Original training data shape: {X_train_processed.shape}')\n", + "print(f'Resampled training data shape: {X_train_resampled.shape}')\n", + "\n", + "# Check class distribution after balancing\n", + "resampled_class_counts = pd.Series(y_train_resampled).value_counts()\n", + "resampled_class_percentages = resampled_class_counts / len(y_train_resampled) * 100\n", + "\n", + "print('\\nClass distribution after balancing:')\n", + "for i, (count, percentage) in enumerate(zip(resampled_class_counts, resampled_class_percentages)):\n", + " print(f'Class {i}: {count} samples ({percentage:.2f}%)')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Model Training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's train several machine learning models and compare their performance. We'll start with a simple model and then try more complex ones." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import models and evaluation metrics\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", + "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report\n", + "\n", + "# Function to evaluate model performance\n", + "def evaluate_model(model, X_test, y_test, model_name):\n", + " # Make predictions\n", + " y_pred = model.predict(X_test)\n", + " \n", + " # Calculate metrics\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " precision = precision_score(y_test, y_pred)\n", + " recall = recall_score(y_test, y_pred)\n", + " f1 = f1_score(y_test, y_pred)\n", + " \n", + " # Print metrics\n", + " print(f'\n{model_name} Performance:')\n", + " print(f'Accuracy: {accuracy:.4f}')\n", + " print(f'Precision: {precision:.4f}')\n", + " print(f'Recall: {recall:.4f}')\n", + " print(f'F1 Score: {f1:.4f}')\n", + " \n", + " # Print confusion matrix\n", + " cm = confusion_matrix(y_test, y_pred)\n", + " print('\nConfusion Matrix:')\n", + " print(cm)\n", + " \n", + " # Plot confusion matrix\n", + " plt.figure(figsize=(8, 6))\n", + " sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)\n", + " plt.xlabel('Predicted')\n", + " plt.ylabel('True')\n", + " plt.title(f'Confusion Matrix - {model_name}')\n", + " plt.show()\n", + " \n", + " # Print classification report\n", + " print('\nClassification Report:')\n", + " print(classification_report(y_test, y_pred))\n", + " \n", + " return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1, 'confusion_matrix': cm}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.1 Logistic Regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Train and evaluate all models in params['models']\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", + "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report\n", + "\n", + "model_classes = {\n", + " 'LogisticRegression': LogisticRegression,\n", + " 'RandomForestClassifier': RandomForestClassifier,\n", + " 'GradientBoostingClassifier': GradientBoostingClassifier\n", + "}\n", + "\n", + "results = {}\n", + "X_val_processed = preprocessor.transform(X_val)\n", + "\n", + "for model_name, model_params in params['models'].items():\n", + " print(f'Training {model_name}...')\n", + " model_cls = model_classes[model_name]\n", + " model = model_cls(**model_params)\n", + " model.fit(X_train_resampled, y_train_resampled)\n", + " metrics = evaluate_model(model, X_val_processed, y_val, model_name)\n", + " results[model_name] = {\n", + " 'model': model,\n", + " 'metrics': metrics,\n", + " 'balancing_method': params['balancing']\n", + " }\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2 Random Forest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Train Random Forest model\n", + "print('Training Random Forest model...')\n", + "rf_model = RandomForestClassifier(**params['models']['RandomForestClassifier'])\n", + "rf_model.fit(X_train_resampled, y_train_resampled)\n", + "\n", + "# Evaluate model\n", + "rf_metrics = evaluate_model(rf_model, X_val_processed, y_val, 'Random Forest')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.3 Gradient Boosting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Train Gradient Boosting model\n", + "print('Training Gradient Boosting model...')\n", + "gb_model = GradientBoostingClassifier(**params['models']['GradientBoostingClassifier'])\n", + "gb_model.fit(X_train_resampled, y_train_resampled)\n", + "\n", + "# Evaluate model\n", + "gb_metrics = evaluate_model(gb_model, X_val_processed, y_val, 'Gradient Boosting')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Model Comparison" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's compare the performance of the different models to select the best one." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a DataFrame to compare model performance\n", + "models = ['Logistic Regression', 'Random Forest', 'Gradient Boosting']\n", + "metrics = ['accuracy', 'precision', 'recall', 'f1']\n", + "\n", + "comparison_data = []\n", + "for metric in metrics:\n", + " comparison_data.append([\n", + " lr_metrics[metric],\n", + " rf_metrics[metric],\n", + " gb_metrics[metric]\n", + " ])\n", + "\n", + "comparison_df = pd.DataFrame(comparison_data, columns=models, index=metrics)\n", + "\n", + "# Display the comparison table\n", + "print('Model Performance Comparison:')\n", + "comparison_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize model comparison\n", + "plt.figure(figsize=(12, 8))\n", + "comparison_df.plot(kind='bar', figsize=(12, 8))\n", + "plt.title('Model Performance Comparison')\n", + "plt.xlabel('Metric')\n", + "plt.ylabel('Score')\n", + "plt.xticks(rotation=0)\n", + "plt.legend(title='Model')\n", + "plt.grid(axis='y')\n", + "\n", + "# Add value labels\n", + "for i, metric in enumerate(metrics):\n", + " for j, model in enumerate(models):\n", + " value = comparison_df.iloc[i, j]\n", + " plt.text(i + (j - 1) * 0.3, value + 0.01, f'{value:.4f}', ha='center', va='bottom', fontsize=9)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Feature Importance" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's analyze which features are most important for the best performing model (Random Forest in this case)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get feature names after one-hot encoding\n", + "# For numerical features, the names remain the same\n", + "# For categorical features, we need to get the one-hot encoded feature names\n", + "\n", + "# Get the one-hot encoder from the preprocessor\n", + "ohe = preprocessor.named_transformers_['cat']\n", + "\n", + "# Get the one-hot encoded feature names\n", + "categorical_features = []\n", + "for i, category in enumerate(categorical_cols):\n", + " values = ohe.categories_[i]\n", + " for value in values:\n", + " categorical_features.append(f'{category}_{value}')\n", + "\n", + "# Combine with numerical feature names\n", + "feature_names = numerical_cols + categorical_features\n", + "\n", + "# Get feature importances from the Random Forest model\n", + "importances = rf_model.feature_importances_\n", + "\n", + "# Create a DataFrame for visualization\n", + "feature_importance = pd.DataFrame({\n", + " 'Feature': feature_names,\n", + " 'Importance': importances\n", + "}).sort_values('Importance', ascending=False)\n", + "\n", + "# Display the top 20 most important features\n", + "print('Top 20 Most Important Features:')\n", + "feature_importance.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize feature importance\n", + "plt.figure(figsize=(12, 10))\n", + "sns.barplot(x='Importance', y='Feature', data=feature_importance.head(20))\n", + "plt.title('Top 20 Feature Importance')\n", + "plt.xlabel('Importance')\n", + "plt.ylabel('Feature')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Save the Best Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's save the best performing model (Random Forest) for later use." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a full pipeline with preprocessing and the best model\n", + "best_model = Pipeline(steps=[\n", + " ('preprocessor', preprocessor),\n", + " ('classifier', rf_model)\n", + "])\n", + "\n", + "# Save the model\n", + "import os\n", + "os.makedirs(config.MODELS_DIR, exist_ok=True)\n", + "joblib.dump(best_model, config.MODEL_PATH)\n", + "print(f'Model saved to {config.MODEL_PATH}')\n", + "\n", + "# Save model metadata\n", + "import json\n", + "metadata = {\n", + " 'model_type': 'RandomForestClassifier',\n", + " 'metrics': {\n", + " 'accuracy': float(rf_metrics['accuracy']),\n", + " 'precision': float(rf_metrics['precision']),\n", + " 'recall': float(rf_metrics['recall']),\n", + " 'f1': float(rf_metrics['f1'])\n", + " },\n", + " 'feature_importance': feature_importance.head(20).to_dict(orient='records'),\n", + " 'features': X_train.columns.tolist()\n", + "}\n", + "\n", + "with open(config.MODEL_METADATA_PATH, 'w') as f:\n", + " json.dump(metadata, f, indent=4)\n", + "\n", + "print(f'Model metadata saved to {config.MODEL_METADATA_PATH}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we trained and evaluated several machine learning models for fraud detection:\n", + "\n", + "1. **Data Preparation**: We loaded the preprocessed data and split it into training and validation sets.\n", + "\n", + "2. **Class Imbalance**: We addressed the class imbalance problem using SMOTE to generate synthetic samples of the minority class.\n", + "\n", + "3. **Model Training**: We trained three different models - Logistic Regression, Random Forest, and Gradient Boosting.\n", + "\n", + "4. **Model Evaluation**: We evaluated the models using accuracy, precision, recall, and F1 score, with a focus on the F1 score due to the class imbalance.\n", + "\n", + "5. **Model Comparison**: We compared the performance of the different models and found that Random Forest performed the best overall.\n", + "\n", + "6. **Feature Importance**: We analyzed which features were most important for the Random Forest model.\n", + "\n", + "7. **Model Saving**: We saved the best model (Random Forest) and its metadata for later use.\n", + "\n", + "The Random Forest model achieved good performance in detecting fraudulent transactions, with a balance between precision and recall as reflected in the F1 score. The most important features for fraud detection included transaction amount, distance between cardholder and merchant, and time-based features.\n", + "\n", + "Next steps could include:\n", + "- Fine-tuning the model hyperparameters using grid search or random search\n", + "- Trying more advanced models like XGBoost or neural networks\n", + "- Implementing the model in a production environment for real-time fraud detection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# --- PARAMETER CONFIGURATION CELL ---\n", + "params = {\n", + " 'random_state': 42,\n", + " 'test_size': 0.2,\n", + " 'balancing': 'smote', # Options: 'smote', 'downsample', 'none'\n", + " 'smote': {\n", + " 'enabled': True,\n", + " 'random_state': 42\n", + " },\n", + " 'downsample': {\n", + " 'enabled': False\n", + " },\n", + " 'models': {\n", + " 'LogisticRegression': {\n", + " 'class_weight': 'balanced',\n", + " 'max_iter': 1000,\n", + " 'random_state': 42\n", + " },\n", + " 'RandomForestClassifier': {\n", + " 'n_estimators': 100,\n", + " 'class_weight': 'balanced',\n", + " 'random_state': 42\n", + " },\n", + " 'GradientBoostingClassifier': {\n", + " 'n_estimators': 100,\n", + " 'random_state': 42\n", + " }\n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Understanding the Confusion Matrix, Precision, and Recall in Fraud Detection\n", + "\n", + "- **Confusion Matrix**: Shows the counts of true positives (fraud correctly detected), false positives (legitimate transactions flagged as fraud), true negatives (legitimate transactions correctly identified), and false negatives (fraud missed).\n", + "- **Precision**: Of all transactions flagged as fraud, how many were actually fraud? High precision means few false alarms.\n", + "- **Recall**: Of all actual frauds, how many did we catch? High recall means few missed frauds.\n", + "- **F1 Score**: Harmonic mean of precision and recall. Useful when classes are imbalanced.\n", + "\n", + "In fraud detection, recall is often prioritized (catch as many frauds as possible), but high precision is also important to avoid annoying users with false alarms.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Summary table for model and balancing method performance\n", + "import pandas as pd\n", + "summary_rows = []\n", + "for model_name, result in results.items():\n", + " metrics = result['metrics']\n", + " summary_rows.append({\n", + " 'Model': model_name,\n", + " 'Balancing': result.get('balancing_method', params.get('balancing', 'smote')),\n", + " 'Precision': metrics['precision'],\n", + " 'Recall': metrics['recall'],\n", + " 'F1': metrics['f1']\n", + " })\n", + "summary_df = pd.DataFrame(summary_rows)\n", + "print('Model and Balancing Method Performance Summary:')\n", + "display(summary_df)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How to Interpret the Results\n", + "\n", + "- Compare precision, recall, and F1 across models and balancing methods.\n", + "- Look for the best trade-off: high recall (catching fraud) with acceptable precision (not too many false alarms).\n", + "- See how the confusion matrix changes: does a method increase recall but lower precision, or vice versa?\n", + "- Use these insights to choose the best model and balancing strategy for your business needs.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 + } + \ No newline at end of file