{ "cells": [ { "cell_type": "markdown", "id": "2c5baf8e", "metadata": {}, "source": [ "# ๐Ÿ“Š Exploratory Data Analysis: Fraud Detection Dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "2f3e6a97", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "df = pd.read_csv(\"fraudTest.csv\")\n", "df.head()" ] }, { "cell_type": "markdown", "id": "2bcadae6", "metadata": {}, "source": [ "## ๐Ÿงพ Basic Overview of the Dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "820cb0e9", "metadata": {}, "outputs": [], "source": [ "print(\"Shape:\", df.shape)\n", "print(\"\\nData Types:\\n\", df.dtypes)\n", "print(\"\\nMissing Values:\\n\", df.isnull().sum())\n", "print(\"\\nDuplicate Rows:\", df.duplicated().sum())" ] }, { "cell_type": "markdown", "id": "caa22db9", "metadata": {}, "source": [ "## โš–๏ธ Class Balance" ] }, { "cell_type": "code", "execution_count": null, "id": "7fb75259", "metadata": {}, "outputs": [], "source": [ "sns.countplot(data=df, x=\"is_fraud\")\n", "plt.title(\"Fraud vs Non-Fraud Transactions\")\n", "plt.show()\n", "\n", "fraud_ratio = df[\"is_fraud\"].mean()\n", "print(f\"Fraudulent transactions: {fraud_ratio:.4%}\")" ] }, { "cell_type": "markdown", "id": "658e9cd2", "metadata": {}, "source": [ "## ๐Ÿ“Š Statistical Summary" ] }, { "cell_type": "code", "execution_count": null, "id": "202e2612", "metadata": {}, "outputs": [], "source": [ "df.describe(include='all')" ] }, { "cell_type": "markdown", "id": "12d24a95", "metadata": {}, "source": [ "## ๐Ÿ”— Correlation Matrix" ] }, { "cell_type": "code", "execution_count": null, "id": "3c02acf0", "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(12, 8))\n", "sns.heatmap(df.corr(numeric_only=True), annot=True, fmt=\".2f\", cmap=\"coolwarm\")\n", "plt.title(\"Feature Correlation Matrix\")\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "fce8183a", "metadata": {}, "source": [ "## ๐Ÿ’ต Transaction Amount Distribution by Fraud" ] }, { "cell_type": "code", "execution_count": null, "id": "ea72b131", "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(10, 6))\n", "sns.boxplot(data=df, x='is_fraud', y='amt')\n", "plt.yscale('log')\n", "plt.title(\"Transaction Amount by Fraud Status\")\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "a7d7d378", "metadata": {}, "source": [ "## ๐Ÿ•’ Transaction Timing (Hourly)" ] }, { "cell_type": "code", "execution_count": null, "id": "5f26f36f", "metadata": {}, "outputs": [], "source": [ "df['trans_date_trans_time'] = pd.to_datetime(df['trans_date_trans_time'])\n", "df['hour'] = df['trans_date_trans_time'].dt.hour\n", "\n", "plt.figure(figsize=(12, 6))\n", "sns.histplot(data=df, x='hour', hue='is_fraud', multiple='stack', bins=24)\n", "plt.title(\"Transaction Hour Distribution\")\n", "plt.show()" ] } ], "metadata": {}, "nbformat": 4, "nbformat_minor": 5 }