{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import classification_report\n", "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", "from sklearn.compose import ColumnTransformer\n", "from sklearn.pipeline import Pipeline\n", "import joblib" ], "metadata": { "id": "a1U4QR1AOKAK" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Load processed data\n", "train_df = pd.read_csv('train_processed.csv')\n", "test_df = pd.read_csv('test_processed.csv')" ], "metadata": { "id": "mOjH5CqhOlTp" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Prepare features and target\n", "X_train = train_df.drop(['is_fraud', 'trans_date_trans_time', 'trans_num', 'dob', 'unix_time'], axis=1)\n", "y_train = train_df['is_fraud']\n", "\n", "X_test = test_df.drop(['is_fraud', 'trans_date_trans_time', 'trans_num', 'dob', 'unix_time'], axis=1)\n", "y_test = test_df['is_fraud']\n" ], "metadata": { "id": "VW8cpQ6FNvC9" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Define preprocessing\n", "numeric_features = ['amt', 'city_pop', 'hour', 'day_of_week', 'month', 'age', 'distance']\n", "numeric_transformer = StandardScaler()\n", "\n", "categorical_features = ['category', 'gender', 'job', 'merchant']\n", "categorical_transformer = OneHotEncoder(handle_unknown='ignore')\n", "\n", "preprocessor = ColumnTransformer(\n", " transformers=[\n", " ('num', numeric_transformer, numeric_features),\n", " ('cat', categorical_transformer, categorical_features)\n", " ])" ], "metadata": { "id": "GkGMEEYqOc_p" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Create pipeline\n", "model = Pipeline(steps=[\n", " ('preprocessor', preprocessor),\n", " ('classifier', RandomForestClassifier(class_weight='balanced', random_state=42))\n", "])\n" ], "metadata": { "id": "s2VyUaV6OZw9" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Train model\n", "model.fit(X_train, y_train)" ], "metadata": { "id": "6dXK4nJnOW40" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Evaluate on test data\n", "y_pred = model.predict(X_test)\n", "print(\"Test Set Performance:\")\n", "print(classification_report(y_test, y_pred))" ], "metadata": { "id": "PNV9fZhkOVFM" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Save model\n", "joblib.dump(model, 'fraud_model.pkl')\n", "print(\"Model saved to fraud_model.pkl\")" ], "metadata": { "id": "4j2mlzRINc_N" }, "execution_count": null, "outputs": [] } ] }