141 lines
3.7 KiB
Plaintext
141 lines
3.7 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"from sklearn.metrics import classification_report\n",
|
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
|
"from sklearn.compose import ColumnTransformer\n",
|
|
"from sklearn.pipeline import Pipeline\n",
|
|
"import joblib"
|
|
],
|
|
"metadata": {
|
|
"id": "a1U4QR1AOKAK"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Load processed data\n",
|
|
"train_df = pd.read_csv('train_processed.csv')\n",
|
|
"test_df = pd.read_csv('test_processed.csv')"
|
|
],
|
|
"metadata": {
|
|
"id": "mOjH5CqhOlTp"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Prepare features and target\n",
|
|
"X_train = train_df.drop(['is_fraud', 'trans_date_trans_time', 'trans_num', 'dob', 'unix_time'], axis=1)\n",
|
|
"y_train = train_df['is_fraud']\n",
|
|
"\n",
|
|
"X_test = test_df.drop(['is_fraud', 'trans_date_trans_time', 'trans_num', 'dob', 'unix_time'], axis=1)\n",
|
|
"y_test = test_df['is_fraud']\n"
|
|
],
|
|
"metadata": {
|
|
"id": "VW8cpQ6FNvC9"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Define preprocessing\n",
|
|
"numeric_features = ['amt', 'city_pop', 'hour', 'day_of_week', 'month', 'age', 'distance']\n",
|
|
"numeric_transformer = StandardScaler()\n",
|
|
"\n",
|
|
"categorical_features = ['category', 'gender', 'job', 'merchant']\n",
|
|
"categorical_transformer = OneHotEncoder(handle_unknown='ignore')\n",
|
|
"\n",
|
|
"preprocessor = ColumnTransformer(\n",
|
|
" transformers=[\n",
|
|
" ('num', numeric_transformer, numeric_features),\n",
|
|
" ('cat', categorical_transformer, categorical_features)\n",
|
|
" ])"
|
|
],
|
|
"metadata": {
|
|
"id": "GkGMEEYqOc_p"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Create pipeline\n",
|
|
"model = Pipeline(steps=[\n",
|
|
" ('preprocessor', preprocessor),\n",
|
|
" ('classifier', RandomForestClassifier(class_weight='balanced', random_state=42))\n",
|
|
"])\n"
|
|
],
|
|
"metadata": {
|
|
"id": "s2VyUaV6OZw9"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Train model\n",
|
|
"model.fit(X_train, y_train)"
|
|
],
|
|
"metadata": {
|
|
"id": "6dXK4nJnOW40"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Evaluate on test data\n",
|
|
"y_pred = model.predict(X_test)\n",
|
|
"print(\"Test Set Performance:\")\n",
|
|
"print(classification_report(y_test, y_pred))"
|
|
],
|
|
"metadata": {
|
|
"id": "PNV9fZhkOVFM"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Save model\n",
|
|
"joblib.dump(model, 'fraud_model.pkl')\n",
|
|
"print(\"Model saved to fraud_model.pkl\")"
|
|
],
|
|
"metadata": {
|
|
"id": "4j2mlzRINc_N"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
}
|
|
]
|
|
} |