Files

2555 lines
320 KiB
Plaintext
Raw Permalink Normal View History

2023-03-22 21:09:45 +00:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "R9zvSxjt24af",
"outputId": "de9749be-551f-4613-e586-b70389d852e3"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Dimensions: 178 x 13\n",
"\n",
"Header: ['alcohol', 'malic acid', 'ash', 'ash alcalinity', 'magnesium', 'total phenols', 'flavanoids', 'nonflavanoid phenols', 'proanthocyanins', 'color intensity', 'hue', 'OD280/OD315 of diluted wines', 'proline']\n",
"1st row [1.423e+01 1.710e+00 2.430e+00 1.560e+01 1.270e+02 2.800e+00 3.060e+00\n",
" 2.800e-01 2.290e+00 5.640e+00 1.040e+00 3.920e+00 1.065e+03]\n"
]
}
],
"source": [
"from mlxtend.data import wine_data\n",
"X, y = wine_data()\n",
"\n",
"print('Dimensions: %s x %s' % (X.shape[0], X.shape[1]))\n",
"print('\\nHeader: %s' % ['alcohol', 'malic acid', 'ash', 'ash alcalinity',\n",
" 'magnesium', 'total phenols', 'flavanoids',\n",
" 'nonflavanoid phenols', 'proanthocyanins',\n",
" 'color intensity', 'hue', 'OD280/OD315 of diluted wines',\n",
" 'proline'])\n",
"print('1st row', X[0])"
]
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"print('Classes: %s' % np.unique(y))\n",
"print('Class distribution: %s' % np.bincount(y))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kj6nE9hz3AMs",
"outputId": "ae7a436c-6bcd-43de-c171-c34034ef4320"
},
"execution_count": 93,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Classes: [0 1 2]\n",
"Class distribution: [59 71 48]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"X.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "lQ_JL1i4Eo0C",
"outputId": "2971e185-5ee1-4785-e6ff-05d042406441"
},
"execution_count": 94,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(178, 13)"
]
},
"metadata": {},
"execution_count": 94
}
]
},
{
"cell_type": "code",
"source": [
"columns = ['alcohol', 'malic acid', 'ash', 'ash alcalinity',\n",
" 'magnesium', 'total phenols', 'flavanoids',\n",
" 'nonflavanoid phenols', 'proanthocyanins',\n",
" 'color intensity', 'hue', 'OD280/OD315 of diluted wines',\n",
" 'proline']"
],
"metadata": {
"id": "LIyPC2bsEyEq"
},
"execution_count": 95,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)"
],
"metadata": {
"id": "KUIpscN0I3T4"
},
"execution_count": 96,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn import tree\n",
"clf = tree.DecisionTreeClassifier()"
],
"metadata": {
"id": "bmMYttf2M7GY"
},
"execution_count": 97,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Fit the classifier to the training data\n",
"clf.fit(X_train, y_train)\n",
"\n",
"# Predict the class labels for the test data\n",
"y_pred = clf.predict(X_test)\n",
"\n",
"# Calculate the accuracy of the classifier\n",
"accuracy = clf.score(X_test, y_test)\n",
"\n",
"# Print the results\n",
"print(\"Predicted class labels:\", y_pred)\n",
"print(\"Accuracy:\", accuracy)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6mGo2Xb3NX3X",
"outputId": "4a9d3837-1c80-41ed-fbfb-ad28c2d0c45b"
},
"execution_count": 98,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Predicted class labels: [0 0 2 0 1 0 1 2 1 2 1 0 0 1 0 1 1 1 0 1 0 1 1 2 2 2 1 1 1 0 0 1 2 0 0 0 2\n",
" 2 1 2 1 1 1 1 2 0 1 1 2 0 1 0 0 2]\n",
"Accuracy: 0.9444444444444444\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"plt.figure(figsize=(12,8))\n",
"tree.plot_tree(clf)\n",
"plt.show()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 466
},
"id": "UeqGcdxwN7CE",
"outputId": "bf7ad83c-399e-4127-c273-a83cb9296460"
},
"execution_count": 99,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 864x576 with 1 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAqsAAAHBCAYAAABOnPJQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAACqOklEQVR4nOzdd1gU19cH8O8FkbYLS1EBAVFiV0Sixi5Yo7GhWKMi9pJgTDQW7LHFoDFRMT9jS8SSYiFGMWLsRqMmAnajgoCiCEgHaef9A9nXlUUpi1s4n+e5zyMzd2bOLNd7D7MzdwQRgTHGGGOMMU2kp+4AGGOMMcYYKw4nq4wxxhhjTGNxssoYY4wxxjQWJ6uMMcYYY0xjcbLKGGOMMcY0FierjDHGGGNMY3GyyhhjjDHGNBYnq4wxxhhjTGNxssoYY4wxxjQWJ6uMMcYYY0xjcbLKGGOMMcY0FierjDHGGGNMY3GyyhhjjDHGNBYnq4wxxhhjTGNxssoYY4wxxjRWFXUHwBjTDcbGxo+zsrJqqDsOplmMjIyeZGZm2qg7DsaY9hJEpO4YGGM6QAhB3J+wVwkhQERC3XEwxrQX3wbAGGOMMcY0FierjDHGGGNMY3GyyhhjjDHGNBYnq4wxxhhjTGNxssoYY4wxxjQWJ6uMMZ2zaNEiVKlSBRKJBFFRUSXaZtasWTA1NYUQAmlpaRUcIWOMsZLiZJUxppP69++PtLQ0ODo6AgByc3Ph5+cHBwcHmJmZwdPTE3FxcfL6X375Ja5fv66ucAEA586dg6urKywsLGBhYYFu3bq9NqacnBz4+vrC1tYWMpkMHh4eCvVnzpyJ+vXrQyqVok6dOli1atXbOA3GGFMpTlYZY5XCV199hSNHjuDvv//GkydPYG5ujhEjRqhs/48fPy73PurVq4eDBw8iMTERT58+Re/evTFw4MBi669btw5HjhzBxYsXER8fj5YtW2LIkCHy9UZGRti3bx+SkpJw+PBhBAQEYOvWreWOkzHG3iZOVhljWufBgwewtLTE8ePHAQCZmZlo1KgRvvnmm2K32b9/P6ZNmwY7OzsYGxtj8eLFCAkJQWRkZJnjyMzMxO7du9GzZ0/UrVu3zPspVK1aNTg4OBROpA89PT3cv38f+fn5SutHRkaia9eucHBwQJUqVeDt7Y2bN28iLy8PAPDFF1+gcePG0NfXR4MGDTBgwACcPXu23HEyxtjbxMkqY0zr1KpVCwEBARg1ahQSEhIwffp0ODk5wdfXt9ht8vPz8fIbtgoTwPDw8FIdm4hw6tQpjB07FnZ2dti0aRO8vLwQExMjr7Ny5UrIZLJiy+sSxuTkZMhkMhgZGWHatGnw8/ODnp7yrnrs2LG4dOkSIiMjkZ2dja1bt6JHjx7Q19dXGvfp06fRuHHjUp0vY4ypWxV1B8AYY2UxdOhQHDlyBB4eHnj69CnCwsIgRPFv9ezduzfWrl2Lzp07w8LCAgsWLIAQAikpKSU+5oYNG+Dv7w9TU1OMGDEC4eHhcHBwKFJv9uzZmD17dpnOy9zcHElJSUhLS8OOHTtQs2bNYuvWqVMHTZo0Qe3ataGvrw97e3v8+eefSuv6+fkhIyMDkyZNKlNcjDGmLnxllTGmtaZMmYKrV69i9OjRqF69+mvrzpkzB126dEH79u1Rr149uLq6QiKRwMrKqsTHi4iIQFxcHFxdXdGsWTPY2tqW9xSKJZFIMHHiRPj4+CA6OlppnSlTpuDx48d48uQJsrKysGjRInTq1KlIAr5ixQr89NNPOHr0KExNTSssZsYYqwicrDLGtNLz588xfvx4+Pj4YOPGjbh58+Zr6xsaGsLf3x8PHjzAo0eP0KtXL2RnZ+O9994r8TH9/f0RFRWFNm3aYPHixahZsyY+/vhjXLhwQaHe8uXLIZFIii1nzpwp0fGICJmZmcXeV3vlyhV4e3ujevXqqFKlCkaPHo3MzEz8+++/8jorV67E//73Pxw/fhz29vYlPlfGGNMUnKwyxrTS559/jmrVqmHLli2YM2cOhg0bhufPnxdbPzY2Fg8ePAAR4b///sPYsWPx6aefwtLSslTHtbKywtSpU3HhwgWcPXsWMpkMw4YNwzvvvCOvM3fuXKSlpRVbOnTooHTf+/fvx40bN5Cfn4/k5GRMnz4dEokErq6uSuu3bt0aO3bsQEJCAvLy8rBjxw5kZGSgQYMGAIBVq1YhICAAJ06cQK1atUp1nowxpik4WWWMaZ3g4GDs2rULP/74I4QQmDlzJqysrDBr1qxit4mKioKHhwdMTU3RtWtX9O7dG8uWLStXHHXr1sUXX3yB+/fvY9u2beXaF1Aw/VWfPn0glUrxzjvv4N69ezh69CikUikAYOfOnQoPSH311VeoVq0aGjduDAsLC3z99df4+eefYWNjA6DgRQePHz9G06ZN5Vd1e/bsWe44GWPsbRIvPx3LGGNlJYQgTelPli5dihUrVsDAwADh4eHyFwO8zpw5cxAQEIDnz58jISGB7+1UkRfTcBX/5BtjjL0BJ6uMMZXQpGSVaQ5OVhlj5cW3ATDGGGOMMY3FySpjjDHGGNNYnKwyxnRWVFQUJBIJ0tPTS1S/Z8+e2LRpUwVHxRhjrDQ4WWWM6SxHR0ekpaWV+GGp4OBgTJgwQeVx3LhxA23btoWJiQkaNmyIkJCQ19ZPTEyEj48PLC0tYW5ujnbt2snXnTx5EkIIhXlbFy5cKF+fk5MDX19f2NraQiaTwcPDA9evX1f5OTHG2NvCySpjjFWgnJwc9O3bF71798azZ8+waNEiDBw4EI8fPy52mwEDBsDY2Bj37t1DYmIi1q5dq7DeyspKYd7WxYsXy9etW7cOR44cwcWLFxEfH4+WLVtiyJAhFXV6jDFW4ThZZYxptfDwcLRq1QpSqRTvv/8+Pv74Y3h5eQEAIiMjIYRAWloaAGD06NGYOHEivLy8IJVK0bhxY1y8eFG+L3d3d6xfv16l8Z0+fRrJycmYNWsWDA0NMWTIEDRt2hQ//fST0vrHjh3D/fv38e2338LCwgL6+vpo2bJliY8XGRmJrl27wsHBAVWqVIG3tzdu3ryJvLw8VZ0SY4y9VZysMsa0Vk5ODvr164d+/fohMTERfn5++PHHH1+7ze7du+Hr64ukpCT06dMHkyZNKtGxCt9WVVxZuXKl0u3Cw8PRpEkT6Ovry5e5urri2rVrSuufP38eDRo0wLhx42BlZQUXFxfs27dPoU5SUhJsbW3h6OiIsWPHIj4+Xr5u7NixuHTpEiIjI5GdnY2tW7eiR48eCsdnjDFtwskqY0xrnT9/HikpKZg9ezYMDAzQoUMH9OnT57Xb9O3bFx07doS+vj5GjRqF8PDwEl11bN++PZKSkoots2fPVrpdWloazM3NFZbJZDKkpqYqrR8dHY2QkBC0bt0ajx8/xpo1azBixAhcvXoVANCgQQOEhYUhJiYGFy5cQGJiosLX/HXq1EGTJk1Qu3ZtmJiYYO/evVi3bt0bz48xxjQVJ6uMMa316NEj2NnZKVw1dHBweO02ha8iBQATExPk5eUhMzOzwmKUSCRISUlRWJacnCx/heqrTExMYG9vj0mTJsHAwABdu3ZFly5dEBwcLI+/cePG0NfXh52dHQICAnD8+HH51dUpU6bg8ePHePLkCbKysrBo0SJ06tSpSAyMMaYtOFlljGktOzs7xMbGKlwZjY6OrpBjnTlzRuEJ/FfL8uXLlW7n4uKCa9euIT8/X74sNDQUTZo0Kba+EEVf+FTc28H09PQU1l+5cgXe3t6oXr06qlSpgtGjRyMzMxP//vtvqc6XMcY0BSerjDGt1aZNG5iamuKrr75CTk4Ozp07h4MHD1bIsTp06KDwBP6rZe7cuUq369ixI8zMzLBq1So8f/4cv/zyC8LDwzF48GCl9T09PZGeno7NmzcjLy8PJ0+exPHjx9GrVy8AwIkTJxAZGQkiQlxcHKZOnYr27dujWrVqAIDWrVtjx44dSEhIQF5eHnbs2IGMjAw0aNCgQj4XxhiraJysMsa0loGBAYKCgvDrr7/CwsICS5YswdChQ2FoaKju0OQKY
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"source": [
"# Confusion Metrics"
],
"metadata": {
"id": "6ApROW4KO5Qw"
}
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import confusion_matrix\n",
"import seaborn as sns\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
"plt.figure(figsize=(12,8))\n",
"cm = confusion_matrix(y_test,y_pred)\n",
" \n",
"#Plot the confusion matrix.\n",
"sns.heatmap(cm,\n",
" annot=True,\n",
" fmt='g',\n",
" xticklabels=['0', '1' , '2'],\n",
" yticklabels=['0', '1' , '2'])\n",
"plt.ylabel('Prediction',fontsize=13)\n",
"plt.xlabel('Actual',fontsize=13)\n",
"plt.title('Confusion Matrix',fontsize=17)\n",
"plt.show()\n",
" \n",
" "
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 520
},
"id": "bCc29IiJOLKI",
"outputId": "b5754110-096a-4a5a-8f42-3a72af060178"
},
"execution_count": 100,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 864x576 with 2 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApwAAAH3CAYAAAAWkI+hAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAAt0UlEQVR4nO3debxdZXno8d+TEAYDCIgMGSQgFMUBvAIOaC+IBuQiobRlUDFWMGiVipfiiNLSC9paoCJeMZUAIgSojCIggYKAFSRwESGhDCZKJkDmUZKc5/6xV+LmcKZ9zl7nPTn79/WzPnuvdw3vs2M+8fF51/uuyEwkSZKkuowpHYAkSZJGNxNOSZIk1cqEU5IkSbUy4ZQkSVKtTDglSZJUKxNOSZIk1cqEU9KgRcT7ImJuRDwfERkRO9XQxw0RcUO777umq/68/6F0HJI0ECac0hosIjaJiH+KiF9HxDMR8UJE3BsRp0bEdjX3vRFwEY1/R/4OOBT4XZ19DreIOKtK7F6sfm/342tFxMPVOdcOso/9I+LrQw5WkkawtUoHIGlwImJH4CrgNcAFwPeB5cAOwEHAp4C1awxhZ2Aj4PjMvLTGfqbWeO+BSCCAA4GZ3Y7tBWwG/HEI998f+ChwfIvXrQesGEK/kjRsTDilNVBEbAhcDowDdsnMu7od/wpwQs1hbFZ9PllnJ5n5Up33H4Au4EoaSWH3hPNQ4Db+9GdRq4gYA6ydmS9m5ovD0acktYND6tKaaQbwOuDvuyebAJn5Qmb+7+a2iNgtIq6LiGer7bqIeFe3cz5eDQ/vERHfiIhl1TD9nIjYuum8G4Bzq93rq2tuqI6dFRELu8fUdO8pTW07RcQV1bD0ixGxOCIuiohJzX11f4YzItaNiBMjYmFEvFR9nhgR63Q7b2FEXBsRu0TEL6rf8lBEvOzPZgB+BLwnIrZquveGwH7VsVeIiKMj4qaIeDQi/lg96vD3ERHNvw2YDoyt/mwyIrI6NqXaPzYijoiIe2lUUveujq9+hjMaromIpyLidd3iuDQinouIP2vxN0tS21jhlNZM+9NIPs4fyMkR8efAHGAJf6p8HkEjWdwzM3/R7ZJvAS8CJwKbAn9PI8F8d3X8BGAe8OnqnPnAw638gIh4LXAt8ARwMvAHYAKNYeqJwKJergvgYuCDNJK9X1ZxfRl4C/Chbpe8DvgJcE51/kHASRFxT2b+bIDhXgE8RaPKuerP7y9pVJjPB3pKYP838FPgxzSGvj9A4891Y+Cr1Tkn0Pg//rvRSDx7cjDwahqPTDwOLOx+QmZmRPwN8Bvg7Ih4X9X2CWAa8JnMvG+Av1WS2i8z3dzc1rCNRuLx6xbOn0sjsdu8qW1LGknUr5raPk7jmcVfAGOb2o+q2t/U1PbRqm33bn2dBSzsIYZV955S7U+r9nfpJ/YbgBua9vetrvtGt/O+VbXv09S2sGrbu6ltHRrJ8X8M4M/tLGBF9X0mMK/p2H8CP23q59pu176qh/v9AHgWWKenPrqdO6WK/XlgUg/HE/iHbm0HV+1HA1sDTwNXl/776ubm5uaQurRm2pBGMtGviNgCeDtwTmaurkJm5lIaFb9dIqL7M4jfz8yVTfs/rz63GXzIr/Bk9blf96Hwfuxbff5rt/Z/6XZ8lQWZefWqncz8I3ALrf+WHwFvjIi3R8RkYHd6GU6v+nkeVs9k3zgiNqWRPI8Htm+h38szs8dqbw99ng/MplE5vZhGZfUTLfQlSbUw4ZTWTE8DGwzw3CnV5709HJtXfW7drb378kZPVJ+bDLDPgbiRxnD0scBjEXF1RBwZEa/p57opwKOZ+VhzY2Y+SmNYvr/fAo3f0+pvuam610eBj9CoVF7a28kRsU9E3AK8QKMi/SiNYX1ozO4fqAdbjPNvq9h2Ao7MzCUtXi9JbWfCKa2Z5gPbt1gZbMXKXtqjl/Zm2Uv72Jed1HAIjerrP9NY5ucU4N6IePNAAx2AofyW1TIzaTzHegiN2ekXZ+YLPd444t00nhtdQSMB/F80nuH8YnVKK//29thHH95BY6ksaDzTKknFmXBKa6bLgHVprA3Zn4XV5xt6OPbG6nNBG2Ja5Ql6ruBN6enkzLwjM/8pM/8n8D+qa4/u4/4Lgdd2r4RWQ9ab0t7f0t2PgM1prHXa63A68NfAS8D7M/PfM/PKzLyWnpeQ6i1Bb1lEbALMAu4E/h04pkp+JakoE05pzfR9GrO4T+qpGlgtG3QyQGYuozFp6NDmZzWrZzsPpTFp6JE2xvYA8OqIeFtTX+vTbRZ29Vxj9yrjfBoVvY36uP9Pqs/uM8OP6Xa87TJzftXv12hMGupNF41EcnVVNyLWBY7s4dznaCyLtH4bQvwejermoTQmej0I/DAixrfh3pI0aC6LJK2BMvOpiJhGY0Hy2yNiNnArjTcNvYHG0j+b8aek7GgaSxDdEhGrFi8/gkaVtNU1KfszG/gmcElEfJvG0kGfoDEzfHLTedOBIyPiEhpJ6lo0ZllvUN2jN1cCVwNfqdbrvBV4J40k64rMvKq9P+flMvOUAZx2OfB54NqIOIfGb5pOY6mp7m6vPr8Tjddjrqwm/7QkIj5Mo+J9TGbeXbV9DLgZOInGm6ckqQgTTmkNlZl3VNXNz9NYgPyvaVTUFtB4x/mpTefeGBHvo/H6xGOr5l8BH8nM/2pzXE9UyfApNJ7NXExjnc1ngDObTv05jddj/iWwBY3lf+4B9s/My/q4f0bEAcDXgQ/TeKZyKfANWn89ZC0y8+cRcSjwFRq//REayx/dBFzT7fQf0UiYD6CRlAYDXF91lSrx/i6NiVgnN8VxS0R8E/hqRFzaPFtfkoZTNJ6DlyRJkurhM5ySJEmqlQmnJEmSamXCKUmSpFqZcEqSJKlWJpySJEmq1RqxLNILP/yyU+k1Krz16O4r4khrngVPLSsdgtQWK15a3NIrbuuw/A+/rSXHGbfpNsV/WzMrnJIkSarVGlHhlCRJGpW6VpaOYFiYcEqSJJWSXaUjGBYOqUuSJKlWVjglSZJK6bLCKUmSJA2ZFU5JkqRCskOe4TThlCRJKsUhdUmSJGnorHBKkiSV0iFD6lY4JUmSVCsrnJIkSaV0yJuGrHBKkiSpVlY4JUmSSumQZzhNOCVJkkpxWSRJkiRp6Ew4JUmSCsnsqmXrT0RMjojrI2JeRNwTEZ+r2jeJiDkRcX/1uXEv10+vzrk/Iqb3158JpyRJUudZARydmTsA7wQ+ExE7AF8CrsvM7YDrqv2XiYhNgOOAdwC7Asf1lpiuYsIpSZJUSldXPVs/MnNpZt5RfX8GmA9MBKYBZ1ennQ3s38PlewFzMvPxzHwCmAPs3Vd/ThqSJEkqZQTMUo+IKcDbgFuBzTNzaXVoGbB5D5dMBB5q2l9UtfXKCqckSdIoExEzImJu0zajl/PWBy4CjsrMp5uPZWYC2Y54rHBKkiSVUtObhjJzJjCzr3MiYhyNZPPczLy4an44IrbMzKURsSXwSA+XLgZ2b9qfBNzQV19WOCVJkjpMRARwBjA/M09uOnQ5sGrW+XTgsh4u/xkwNSI2riYLTa3aemWFU5IkqZRyz3DuBhwK/CYi7qzavgJ8E7gwIg4DfgccCBAROwOfyszDM/PxiPgn4LbquuMz8/G+OjPhlCRJKqXQm4Yy82Ygejm8Zw/nzwUOb9qfBcwaaH8OqUuSJKlWVjglSZJKGQHLIg0HK5ySJEmqlRVOSZKkUgo9wzncTDglSZIKyaxnHc6RxiF1SZIk1coKpyRJUilOGpIkSZKGzgqnJElSKR0yacgKpyRJkmplhVOSJKmUDnmG04RTkiSplC6XRZIkSZKGzAqnJElSKR0ypG6FU5IkSbWywilJklRKhyyLZMIpSZJUikPqkiRJ0tBZ4ZQkSSqlQ4bUrXBKkiSpVlY4JUmSSumQCqcJpyRJUiGZvmlIkiRJGjIrnJIkSaV0yJC6FU5JkiTVygqnJElSKS78LkmSJA2dFU5JkqRSOuQZThNOSZKkUhxSlyRJkobOCqckSVIpHTKkboVTkiRJtbLCKUmSVEqHPMNpwilJklSKQ+qSJEnS0
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import accuracy_score\n",
"accuracy_score(y_test,y_pred)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "KGoGXwRkPHBH",
"outputId": "10bfeed7-0d53-409e-ad12-723de04671f6"
},
"execution_count": 101,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.9444444444444444"
]
},
"metadata": {},
"execution_count": 101
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import classification_report\n",
"print(classification_report(y_test, y_pred))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TOzbcfXSQUGo",
"outputId": "24336c98-a019-44a4-c55c-cdccd3a4da24"
},
"execution_count": 102,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.94 0.89 0.92 19\n",
" 1 0.91 1.00 0.95 21\n",
" 2 1.00 0.93 0.96 14\n",
"\n",
" accuracy 0.94 54\n",
" macro avg 0.95 0.94 0.95 54\n",
"weighted avg 0.95 0.94 0.94 54\n",
"\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### Wine Classificaion using the wine_data.xlsx"
],
"metadata": {
"id": "Lbhxu3u6i4at"
}
},
{
"cell_type": "markdown",
"source": [
"### Loading the data"
],
"metadata": {
"id": "wCtbWMDejBW9"
}
},
{
"cell_type": "code",
"source": [
"wine = pd.read_excel('/content/wine_data.xlsx')"
],
"metadata": {
"id": "9Udcqv9QQ1f_"
},
"execution_count": 163,
"outputs": []
},
{
"cell_type": "code",
"source": [
"wine.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "bUDrNfnHjtBs",
"outputId": "d8d0ea56-ffd8-4c2c-ef49-22bc745161c4"
},
"execution_count": 164,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Preference Red_Wine White_Wine Recommendation\n",
"0 Red Light-Bodied NaN Pinot Noir\n",
"1 Red Full-Bodied NaN Shiraz or Zinfandel\n",
"2 White NaN Dry Sauvignon Blanc\n",
"3 White NaN Sweet Gewurztraminer\n",
"4 Red-Fruity NaN NaN Pinot Noir"
],
"text/html": [
"\n",
" <div id=\"df-87941f68-bb2e-4645-ac49-58b5086dae00\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Preference</th>\n",
" <th>Red_Wine</th>\n",
" <th>White_Wine</th>\n",
" <th>Recommendation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Red</td>\n",
" <td>Light-Bodied</td>\n",
" <td>NaN</td>\n",
" <td>Pinot Noir</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Red</td>\n",
" <td>Full-Bodied</td>\n",
" <td>NaN</td>\n",
" <td>Shiraz or Zinfandel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>White</td>\n",
" <td>NaN</td>\n",
" <td>Dry</td>\n",
" <td>Sauvignon Blanc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>White</td>\n",
" <td>NaN</td>\n",
" <td>Sweet</td>\n",
" <td>Gewurztraminer</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Red-Fruity</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Pinot Noir</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-87941f68-bb2e-4645-ac49-58b5086dae00')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-87941f68-bb2e-4645-ac49-58b5086dae00 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-87941f68-bb2e-4645-ac49-58b5086dae00');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 164
}
]
},
{
"cell_type": "code",
"source": [
"wine.value_counts()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Nxoyk2DUjvwD",
"outputId": "f63bb36f-d3a3-4696-e128-2a6f53da402b"
},
"execution_count": 165,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Series([], dtype: int64)"
]
},
"metadata": {},
"execution_count": 165
}
]
},
{
"cell_type": "code",
"source": [
"wine.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XM0ZWZq_j1Dj",
"outputId": "c4f3eaa0-1a30-4806-fb3a-81329552fb82"
},
"execution_count": 166,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 40 entries, 0 to 39\n",
"Data columns (total 4 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Preference 40 non-null object\n",
" 1 Red_Wine 18 non-null object\n",
" 2 White_Wine 6 non-null object\n",
" 3 Recommendation 35 non-null object\n",
"dtypes: object(4)\n",
"memory usage: 1.4+ KB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"wine['Preference'].value_counts()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "G4IfBAQtj2Oc",
"outputId": "51fbc5a9-2080-4637-8ee6-164b59146f40"
},
"execution_count": 167,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Red 6\n",
"White 6\n",
"Red-Fruity 5\n",
"Red-Earthy 5\n",
"White-Crisp 3\n",
"White-Creamy 3\n",
"Red-Spicy 3\n",
"Red-Rich 3\n",
"White-Floral 3\n",
"White-Citrus 3\n",
"Name: Preference, dtype: int64"
]
},
"metadata": {},
"execution_count": 167
}
]
},
{
"cell_type": "code",
"source": [
"wine['Red_Wine'].value_counts()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vrGJsrb0j7Qk",
"outputId": "067da933-1552-49e0-aed2-347d1641e142"
},
"execution_count": 168,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Light-Bodied 5\n",
"Full-Bodied 5\n",
"Dry 4\n",
"Sweet 4\n",
"Name: Red_Wine, dtype: int64"
]
},
"metadata": {},
"execution_count": 168
}
]
},
{
"cell_type": "code",
"source": [
"wine['White_Wine'].value_counts()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "bR5Ie3bakC1E",
"outputId": "3c9a81dc-ad0c-4a38-e4d7-4073bd222b37"
},
"execution_count": 169,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dry 3\n",
"Sweet 3\n",
"Name: White_Wine, dtype: int64"
]
},
"metadata": {},
"execution_count": 169
}
]
},
{
"cell_type": "code",
"source": [
"wine['Recommendation'].value_counts()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iATAENA3kGN0",
"outputId": "92f4684d-27f4-4636-c766-caa0f13e575b"
},
"execution_count": 170,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Pinot Noir 8\n",
"Shiraz or Zinfandel 5\n",
"Sauvignon Blanc 5\n",
"Gewurztraminer 4\n",
"Cabernet Sauvignon 4\n",
"Chianti 3\n",
"Chardonnay 3\n",
"Riesling 3\n",
"Name: Recommendation, dtype: int64"
]
},
"metadata": {},
"execution_count": 170
}
]
},
{
"cell_type": "code",
"source": [
"wine.isnull().sum()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hXVqoyd0kKiE",
"outputId": "25a594a3-90fc-48e8-eb96-44d46d1f4dbc"
},
"execution_count": 171,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Preference 0\n",
"Red_Wine 22\n",
"White_Wine 34\n",
"Recommendation 5\n",
"dtype: int64"
]
},
"metadata": {},
"execution_count": 171
}
]
},
{
"cell_type": "markdown",
"source": [
"## Handing Null Values"
],
"metadata": {
"id": "iaIBH_PQkU2l"
}
},
{
"cell_type": "code",
"source": [
"# Recommendation\n",
"'''\n",
"1.it is the class label and the dataset is also small\n",
"2. so i have planned to replace tha null values using the most frequent values(i.e Pinot Noir)\n",
"'''\n",
"wine['Recommendation'] = wine['Recommendation'].fillna(wine['Recommendation'].value_counts().sort_values(ascending=False).keys()[0])"
],
"metadata": {
"id": "8fcr-w1EkTGc"
},
"execution_count": 172,
"outputs": []
},
{
"cell_type": "code",
"source": [
"wine['Recommendation'].value_counts().sort_values(ascending=False).keys()[0]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 36
},
"id": "ma_R478MlDk-",
"outputId": "0d5f3e9a-1eb0-4a09-99fb-8145b1147089"
},
"execution_count": 173,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'Pinot Noir'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 173
}
]
},
{
"cell_type": "code",
"source": [
"wine['Recommendation'].isnull().sum()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yX8GGdHrlFoF",
"outputId": "5579cf7d-2f7b-4489-d09f-f424afefd402"
},
"execution_count": 174,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0"
]
},
"metadata": {},
"execution_count": 174
}
]
},
{
"cell_type": "code",
"source": [
"# White_Wine\n",
"'''\n",
"1. White_Wine has 2 classes [Dry , Sweet]\n",
"2. Replace with 'None' as a New label\n",
"'''\n",
"\n",
"wine['White_Wine'] = wine['White_Wine'].fillna('None')\n",
"wine['White_Wine'].isnull().sum()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ZD3sh5V4lY-s",
"outputId": "621baedb-3daf-49a7-87e7-45bde6701212"
},
"execution_count": 175,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0"
]
},
"metadata": {},
"execution_count": 175
}
]
},
{
"cell_type": "code",
"source": [
"# Red_Wine\n",
"'''\n",
"1. 22 null values\n",
"2. replace with 'None'\n",
"'''\n",
"\n",
"wine['Red_Wine'] = wine['Red_Wine'].fillna('None')\n",
"wine['Red_Wine'].isnull().sum()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gshPG42-nTgE",
"outputId": "b9a2554f-2c33-4cda-9f2d-895f1a290f2a"
},
"execution_count": 176,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0"
]
},
"metadata": {},
"execution_count": 176
}
]
},
{
"cell_type": "code",
"source": [
"wine.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "s7Ip6blqnlGU",
"outputId": "557ecc39-366c-4a2f-bfbc-db313ff9b10a"
},
"execution_count": 177,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Preference Red_Wine White_Wine Recommendation\n",
"0 Red Light-Bodied None Pinot Noir\n",
"1 Red Full-Bodied None Shiraz or Zinfandel\n",
"2 White None Dry Sauvignon Blanc\n",
"3 White None Sweet Gewurztraminer\n",
"4 Red-Fruity None None Pinot Noir"
],
"text/html": [
"\n",
" <div id=\"df-4d516507-4e1e-4fd2-824a-8b4b2343d008\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Preference</th>\n",
" <th>Red_Wine</th>\n",
" <th>White_Wine</th>\n",
" <th>Recommendation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Red</td>\n",
" <td>Light-Bodied</td>\n",
" <td>None</td>\n",
" <td>Pinot Noir</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Red</td>\n",
" <td>Full-Bodied</td>\n",
" <td>None</td>\n",
" <td>Shiraz or Zinfandel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>White</td>\n",
" <td>None</td>\n",
" <td>Dry</td>\n",
" <td>Sauvignon Blanc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>White</td>\n",
" <td>None</td>\n",
" <td>Sweet</td>\n",
" <td>Gewurztraminer</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Red-Fruity</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>Pinot Noir</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-4d516507-4e1e-4fd2-824a-8b4b2343d008')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-4d516507-4e1e-4fd2-824a-8b4b2343d008 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-4d516507-4e1e-4fd2-824a-8b4b2343d008');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 177
}
]
},
{
"cell_type": "markdown",
"source": [
"### One Hot Encoding"
],
"metadata": {
"id": "Eehc1X-npf7-"
}
},
{
"cell_type": "code",
"source": [
"from sklearn.preprocessing import OneHotEncoder\n",
"wine = pd.get_dummies(wine, columns=['Preference', 'Red_Wine','White_Wine'])"
],
"metadata": {
"id": "KsTVHcsvnnQb"
},
"execution_count": 178,
"outputs": []
},
{
"cell_type": "code",
"source": [
"wine.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 305
},
"id": "4niIcd36pyUt",
"outputId": "410a9df9-294d-403f-b315-10350964c8db"
},
"execution_count": 179,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Recommendation Preference_Red Preference_Red-Earthy \\\n",
"0 Pinot Noir 1 0 \n",
"1 Shiraz or Zinfandel 1 0 \n",
"2 Sauvignon Blanc 0 0 \n",
"3 Gewurztraminer 0 0 \n",
"4 Pinot Noir 0 0 \n",
"\n",
" Preference_Red-Fruity Preference_Red-Rich Preference_Red-Spicy \\\n",
"0 0 0 0 \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 1 0 0 \n",
"\n",
" Preference_White Preference_White-Citrus Preference_White-Creamy \\\n",
"0 0 0 0 \n",
"1 0 0 0 \n",
"2 1 0 0 \n",
"3 1 0 0 \n",
"4 0 0 0 \n",
"\n",
" Preference_White-Crisp Preference_White-Floral Red_Wine_Dry \\\n",
"0 0 0 0 \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"\n",
" Red_Wine_Full-Bodied Red_Wine_Light-Bodied Red_Wine_None Red_Wine_Sweet \\\n",
"0 0 1 0 0 \n",
"1 1 0 0 0 \n",
"2 0 0 1 0 \n",
"3 0 0 1 0 \n",
"4 0 0 1 0 \n",
"\n",
" White_Wine_Dry White_Wine_None White_Wine_Sweet \n",
"0 0 1 0 \n",
"1 0 1 0 \n",
"2 1 0 0 \n",
"3 0 0 1 \n",
"4 0 1 0 "
],
"text/html": [
"\n",
" <div id=\"df-ff8ea101-881d-4f5f-b77f-bf849cbdf12a\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Recommendation</th>\n",
" <th>Preference_Red</th>\n",
" <th>Preference_Red-Earthy</th>\n",
" <th>Preference_Red-Fruity</th>\n",
" <th>Preference_Red-Rich</th>\n",
" <th>Preference_Red-Spicy</th>\n",
" <th>Preference_White</th>\n",
" <th>Preference_White-Citrus</th>\n",
" <th>Preference_White-Creamy</th>\n",
" <th>Preference_White-Crisp</th>\n",
" <th>Preference_White-Floral</th>\n",
" <th>Red_Wine_Dry</th>\n",
" <th>Red_Wine_Full-Bodied</th>\n",
" <th>Red_Wine_Light-Bodied</th>\n",
" <th>Red_Wine_None</th>\n",
" <th>Red_Wine_Sweet</th>\n",
" <th>White_Wine_Dry</th>\n",
" <th>White_Wine_None</th>\n",
" <th>White_Wine_Sweet</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Pinot Noir</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Shiraz or Zinfandel</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Sauvignon Blanc</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Gewurztraminer</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Pinot Noir</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ff8ea101-881d-4f5f-b77f-bf849cbdf12a')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-ff8ea101-881d-4f5f-b77f-bf849cbdf12a button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-ff8ea101-881d-4f5f-b77f-bf849cbdf12a');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 179
}
]
},
{
"cell_type": "code",
"source": [
"y = wine['Recommendation']\n",
"y"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zIUiu-Wwp40s",
"outputId": "873fdc96-b9dc-49de-bfc9-28f4b58e5f6a"
},
"execution_count": 180,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 Pinot Noir\n",
"1 Shiraz or Zinfandel\n",
"2 Sauvignon Blanc\n",
"3 Gewurztraminer\n",
"4 Pinot Noir\n",
"5 Chianti\n",
"6 Sauvignon Blanc\n",
"7 Chardonnay\n",
"8 Shiraz or Zinfandel\n",
"9 Cabernet Sauvignon\n",
"10 Gewurztraminer\n",
"11 Riesling\n",
"12 Pinot Noir\n",
"13 Chianti\n",
"14 Sauvignon Blanc\n",
"15 Chardonnay\n",
"16 Shiraz or Zinfandel\n",
"17 Cabernet Sauvignon\n",
"18 Gewurztraminer\n",
"19 Riesling\n",
"20 Pinot Noir\n",
"21 Shiraz or Zinfandel\n",
"22 Pinot Noir\n",
"23 Cabernet Sauvignon\n",
"24 Sauvignon Blanc\n",
"25 Pinot Noir\n",
"26 Pinot Noir\n",
"27 Chardonnay\n",
"28 Pinot Noir\n",
"29 Shiraz or Zinfandel\n",
"30 Pinot Noir\n",
"31 Cabernet Sauvignon\n",
"32 Pinot Noir\n",
"33 Gewurztraminer\n",
"34 Sauvignon Blanc\n",
"35 Riesling\n",
"36 Pinot Noir\n",
"37 Pinot Noir\n",
"38 Chianti\n",
"39 Pinot Noir\n",
"Name: Recommendation, dtype: object"
]
},
"metadata": {},
"execution_count": 180
}
]
},
{
"cell_type": "code",
"source": [
"wine.columns"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "w9U_0ZcYsHW0",
"outputId": "9507b818-c4cb-444b-8994-01daadf6d7ec"
},
"execution_count": 181,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Index(['Recommendation', 'Preference_Red', 'Preference_Red-Earthy',\n",
" 'Preference_Red-Fruity', 'Preference_Red-Rich', 'Preference_Red-Spicy',\n",
" 'Preference_White', 'Preference_White-Citrus',\n",
" 'Preference_White-Creamy', 'Preference_White-Crisp',\n",
" 'Preference_White-Floral', 'Red_Wine_Dry', 'Red_Wine_Full-Bodied',\n",
" 'Red_Wine_Light-Bodied', 'Red_Wine_None', 'Red_Wine_Sweet',\n",
" 'White_Wine_Dry', 'White_Wine_None', 'White_Wine_Sweet'],\n",
" dtype='object')"
]
},
"metadata": {},
"execution_count": 181
}
]
},
{
"cell_type": "code",
"source": [
"X = wine[['Preference_Red', 'Preference_Red-Earthy',\n",
" 'Preference_Red-Fruity', 'Preference_Red-Rich', 'Preference_Red-Spicy',\n",
" 'Preference_White', 'Preference_White-Citrus',\n",
" 'Preference_White-Creamy', 'Preference_White-Crisp',\n",
" 'Preference_White-Floral', 'Red_Wine_Dry', 'Red_Wine_Full-Bodied',\n",
" 'Red_Wine_Light-Bodied', 'Red_Wine_None', 'Red_Wine_Sweet',\n",
" 'White_Wine_Dry', 'White_Wine_None', 'White_Wine_Sweet']]"
],
"metadata": {
"id": "E8uyeERprdpL"
},
"execution_count": 182,
"outputs": []
},
{
"cell_type": "code",
"source": [
"X.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "M7cCKVNusSVS",
"outputId": "0acb54a5-a1d0-453d-8b34-a3dadccdfa4f"
},
"execution_count": 183,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(40, 18)"
]
},
"metadata": {},
"execution_count": 183
}
]
},
{
"cell_type": "code",
"source": [
"y.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JX6KNEplsT2T",
"outputId": "8df2bde9-41c9-4092-b1e1-65ed81edacf6"
},
"execution_count": 184,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(40,)"
]
},
"metadata": {},
"execution_count": 184
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"le = LabelEncoder()\n",
"\n",
"y = le.fit_transform(y)\n",
"y"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "KwAXTRavsXJ3",
"outputId": "3f6e1f7b-6d7f-4d68-852d-eed24a13720a"
},
"execution_count": 185,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([4, 7, 6, 3, 4, 2, 6, 1, 7, 0, 3, 5, 4, 2, 6, 1, 7, 0, 3, 5, 4, 7,\n",
" 4, 0, 6, 4, 4, 1, 4, 7, 4, 0, 4, 3, 6, 5, 4, 4, 2, 4])"
]
},
"metadata": {},
"execution_count": 185
}
]
},
{
"cell_type": "code",
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42,stratify=y)"
],
"metadata": {
"id": "-SjHHFu5shkT"
},
"execution_count": 186,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(X_train.shape)\n",
"print(y_train.shape)\n",
"print(X_test.shape)\n",
"print(y_test.shape)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "POOvu3hcspbj",
"outputId": "a82dad4b-4f07-487b-fc62-0137459b0aeb"
},
"execution_count": 187,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(28, 18)\n",
"(28,)\n",
"(12, 18)\n",
"(12,)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"clf1 = tree.DecisionTreeClassifier()"
],
"metadata": {
"id": "foK7ookRsxPj"
},
"execution_count": 188,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Fit the classifier to the training data\n",
"clf1.fit(X_train, y_train)\n",
"\n",
"# Predict the class labels for the test data\n",
"y_pred = clf1.predict(X_test)\n",
"\n",
"# Calculate the accuracy of the classifier\n",
"accuracy = clf1.score(X_test, y_test)\n",
"\n",
"# Print the results\n",
"print(\"Predicted class labels:\", y_pred)\n",
"print(\"Accuracy:\", accuracy)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fk3ZoFmrs9VL",
"outputId": "edbe4fe4-5746-4746-de6f-560e87d3a60b"
},
"execution_count": 189,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Predicted class labels: [1 4 4 4 0 0 0 1 0 4 4 4]\n",
"Accuracy: 0.4166666666666667\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"plt.figure(figsize=(30,20))\n",
"tree.plot_tree(clf1)\n",
"plt.show()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 880
},
"id": "6XSnZLsSs_Uz",
"outputId": "22680862-7af1-4dab-af22-d4cc6113fc6b"
},
"execution_count": 191,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 2160x1440 with 1 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAABpgAAARNCAYAAABGy0PtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdeXgV5d3/8fcdgoDSAgr6uP3qY1FxxwVlCRBAkWpBUaxKq1UqWFwqUqWordrN9RGtVXHDHUurVkRFK1WjICpq3XCpdaFuVUQWwbLz/f0xIRpJIEHCZHm/rmsuSM59Zj7n5GRmMt+57ztFBJIkSZIkSZIkSVJVFeQdQJIkSZIkSZIkSXWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVUth3gEkSZIkSZKkmtCsWbOPFy1atFneOZSvpk2bfrJw4cL/yTuHJNU3KSLyziBJkiRJkiStcyml8NqXUkpERMo7hyTVNw6RJ0mSJEmSJEmSpGqxwCRJkiRJkiRJkqRqscAkSZIkSZIkSZKkarHAJEmSJEmSJOWopKSErbbaitGjRwNwxhlnsMUWWzBs2LBybXbeeWdat25d7rm9evWiffv2NZLrnnvuoXPnznTr1o0333xzlcfbtm1LcXExxcXF3HXXXTWSQZJUeyUnOpQkSZIkSVJ9lFKKunDtq6SkhPHjx3P55ZcD8J///Ic33niDe++9t+x7c+fOZYMNNqBz5868+OKL5Z7fvn37Vb5XkS+++IKNNtqoSpmWLVtGp06deOKJJ3jnnXc499xzVykiVXW7eUspEREp7xySVN/Yg0mSJEmSJElaTx599FGOPvpoAE4++WTuvvvuVdpsvvnmpFS+HtKyZUs23HDDam/v008/ZdSoUXTr1o1HH320ys/717/+Rbt27WjWrBk777wz77///iptFi5cSHFxMYcffjgff/xxtbNJkuq2wrwDSJIkSZIkSQ1Fz549mTBhAieffDILFizgsMMOo6SkZJ1v56GHHuKmm25iyZIlDBw4kEmTJtGkSRMARowYwbRp08q179evH8OHDy/7es6cObRo0aLs6xUrVqyyjalTp7LJJptw33338fOf/5yxY8eu89chSaq9LDBJkiRJkiRJ69Epp5xC27Ztef3112tsG2PHjuWzzz7jtNNO44ADDqCw8MvLgBdffPEan9+qVSvmzZtX9nVBwaoDIW2yySYA9O3bl1//+tfrILUkqS5xiDxJkiRJkiRpPRo+fDjXX389I0eOrLFt3Hbbbdx66628/PLL9OzZk9NPP51///vfQNaDqbi4uNwyatSocs/fbrvteOONN1i0aBGvvfYaW221VbnHFy9ezOLFiwGYNm3aKo9Lkuo/ezBJkiRJkiRJ68k111xDhw4dOP744/n444+59tpr2WGHHcq1ueSSSxg3bhyffvop77//PnfffTcvvfQSP//5z3n77bfZb7/9+P3vf8++++672m1tscUWnHnmmZx55plMnjyZjz76iO985ztV6sFUWFjIyJEj6dGjB40bN+b6668H4Oabb6Zt27Zsu+22fP/736d58+YUFhZyzTXXrP2bIkmqk1JE5J1BkiRJkiRJWudSSlEXrn09/fTTDBkyhKFDhzJ06NBqPbdXr140adKEiRMn1lC6ui+lRESkvHNIUn1jgUmSJEmSJEn1Ul0pMKlmWWCSpJrhHEySJEmSJEmSJEmqFgtMkiRJkiRJUi0xbNgwFixYUOnjxx9//Fqv+5577qFz585069aNN998c5XHb7/9djp06MA+++zDjTfeCMAVV1zBvvvuS6dOncrmbnrllVcoLi6muLiY3XbbjUMPPXStM0mS6i6HyJMkSZIkSVK95BB5X1q2bBmdOnXiiSee4J133uHcc8/lrrvuKtemffv2PPXUUxQWFrLnnnvyyiuv8Pbbb/Pd736XiKBr167ceeedbL755mXPOffcc2nbti1HH330+n5JVeYQeZJUMwrzDiBJkiRJkiQ1NMuWLePII49kzpw57LrrrsyYMYPx48dTXFzM+PHjGT9+PPfddx9Lly7lww8/5O6772abbbahffv2vPjii9Xe3r/+9S/atWtHs2bN2HnnnXn//fdXabP99tuzYMECmjRpwre+9S0Avvvd7wJZkaZx48YUFJQfEOnee+/liSeeqP4bIEmq8xwiT5IkSZIkSVrPxo8fz7bbbssjjzzC3nvvXWGbjTbaiAkTJjBs2DBuu+22Stc1YsSIsiHrVi6jRo0q12bOnDm0aNGi7OsVK1assp5DDjmEPfbYg3bt2nHSSSeVe+yvf/0r22yzDZtttlnZ91566SX+93//l29/+9tVes2SpPrFHkySJEmSJEnSevbWW2+x1157AdChQ4dVhqsD2GOPPQDYeuut+cc//lHpulbOjbQ6rVq1Yt68eWVff70n0vz587nwwgvL5mYqLi6mf//+bLjhhjz//PP88Y9/5IEHHij3nD//+c8cccQRa9y2JKl+ssAkSZIkSZIkrWdt27blueee44gjjuD555+vsE1KX04btLq5pEaMGMG0adPKfa9fv34MHz687OvtttuON954g0WLFvHOO++w1VZblWtfUFDABhtsQLNmzcq2t2zZMmbMmMGJJ57Ivffey4YbbljuOffffz9nn3121V6wJKnescAkSZIkSZIkrWeHHHII48aNo1evXuywww40btx4rddVlR5MhYWFjBw5kh49etC4cWOuv/56AG6++Wbatm1LUVERRx11FJ06dSIiGDBgAN/+9rcZPHgws2fP5sgjjwTg6quvZqedduK5555jhx12YKONNlrr3JKkui2t7u4HSZIkSZIkqa5KKUVtvva1dOlSGjduzO23386rr77KBRdckHekeimlRESkNbeUJFWHPZgkSZIkSZKkHPTv35/PP/+cRo0aMXbs2LzjSJJULfZgkiRJkiRJUr1U23swaf2wB5Mk1YyCvANIkiRJkiRJ+mbat29fo+t//fXXKSoqolu3bhx00EHMnTsXgBdffJFOnTrRtWtXfvrTn9ZoBklS7WKBSZIkSZIkSdJqtWnThgceeIAnnniCfv36cc011wBwxRVX8H//939MnjyZOXPm8PLLL+ecVJK0vlhgkiRJkiRJkmrY9OnT6dixIz169OCEE04A4JJLLqG4uJg999yTCRMmAHDeeecxcOBADjzwQHr16sWYMWPo1asXvXv3Zvny5ZSUlLD//vvTr18/9tprL6ZNm1ZuO7NmzeKQQw6hZ8+eHH744SxatKjCbVdX69atadGiBQBNmjShoCC7rLjTTjsxd+5cIoIvvviCli1bruU7JEmqa5yDSZIkSZIkSfVSbZqD6dJLL6VVq1YMGjSIFStWUFBQwBdffMFGG23E3Llz6d27N9OmTeO8884DskLT4MGD2WqrrTj33HMZPHgwxxxzDMuXL2fkyJFMnTqV9957j+OOO47HHnuM9u3b8+KLL3L66afTu3dvevfuzVVXXUWTJ
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import confusion_matrix\n",
"import seaborn as sns\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
"plt.figure(figsize=(12,8))\n",
"cm = confusion_matrix(y_test,y_pred)\n",
" \n",
"#Plot the confusion matrix.\n",
"sns.heatmap(cm,\n",
" annot=True,\n",
" fmt='g',\n",
" xticklabels=['0', '1' , '2' , '3' , '4' , '5' , '6' , '7'],\n",
" yticklabels=['0', '1' , '2' , '3' , '4' , '5' , '6' , '7'])\n",
"plt.ylabel('Prediction',fontsize=13)\n",
"plt.xlabel('Actual',fontsize=13)\n",
"plt.title('Confusion Matrix',fontsize=17)\n",
"plt.show()\n",
" \n",
" "
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 520
},
"id": "Wjy1kMqqtHbj",
"outputId": "59a66ab6-a859-4617-91e5-468e8ec69684"
},
"execution_count": 192,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 864x576 with 2 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApYAAAH3CAYAAAABsh9oAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4jElEQVR4nO3de5gcZZn//89nwiDIWQGTSaITDWJEvogGBNn1B7oQZAPhUgTZ5SDihq+gEnXBw/LVFUWFxSBZd5WsKIkIJuKBAFFAASFsgAQMGCaIhEQyM4kcDYZjMnP//ugi1cz0THeT7nlmut8vrrqmq7q66u776ive3k/VU44IAQAAAFuqJXUAAAAAaAwUlgAAAKgJCksAAADUBIUlAAAAaoLCEgAAADVBYQkAAICaoLAE8IrZfq/tpbaftR22316Hc9xi+5ZaH3eky/L976njAIBiFJbACGb7Nba/avte23+z/ZztB2zPsr1Hnc+9s6SfqfDvyKcknSjpz/U851CzfVlWwD2ffd++729l+y/ZPr95hec42vaXtjhYABgGtkodAIBXxvY+kn4l6bWS5km6RNJGSW+VdJyk/ytp6zqGMFnSzpLOjYhf1vE8h9Xx2JUISZZ0rKTZfd6bIml3SS9swfGPlnSCpHOr/Ny2kjZtwXkBoOYoLIERyPaOkhZIapW0X0Tc1+f9L0o6r85h7J79/Ws9TxIRL9bz+BXolbRQheKvb2F5oqQlynNRV7ZbJG0dEc9HxPNDcU4AqAZD4cDINF3S6yX9a9+iUpIi4rmI+EzxNtsH2f6t7Q3Z8lvbB/bZ5yPZsO4htr9he102vH6j7QlF+90i6cfZ6s3ZZ27J3rvM9uq+MRUdu71o29ttX5sNJz9vu8v2z2yPKz5X32ssbW9j++u2V9t+Mfv7dduv6rPfatu/sb2f7duz77LG9styU4HLJf2d7TcUHXtHSUdl7/Vj+7O2b7P9mO0XsksU/tW2i7+bpJMljcpyE7Yje689Wz/H9mm2H1ChM3p49v7mayxdcIPt9bZf3yeOX9p+xvabq/zOAFA1OpbAyHS0CkXGTyrZ2fZ7JN0oqVt5J/M0FYrC90XE7X0+8h+Snpf0dUm7SvpXFQrJd2fvnyepQ9LHs31WSPpLNV/A9m6SfiPpKUkzJT0uqU2F4eWxkjoH+Jwl/VzS+1Uo6hZncX1B0t6SjuzzkddLukbSj7L9j5P0Ldv3R8T1FYZ7raT1KnQtX8rfB1XoGP9EUqlC9TOSrpN0lQpD1oeqkNddJP1bts95Kvwf/INUKDBL+bCknVS41OFJSav77hARYfsUSX+QNMf2e7NtH5U0TdIZEfFghd8VAF65iGBhYRlhiwoFxr1V7L9UhQLudUXbxqhQLN1VtO0jKlxTeLukUUXbZ2Tb9yradkK27eA+57pM0uoSMbx07PZsfVq2vl+Z2G+RdEvR+tTsc9/os99/ZNuPKNq2Ott2eNG2V6lQBP+0grxdJmlT9nq2pI6i926SdF3ReX7T57OvLnG870vaIOlVpc7RZ9/2LPZnJY0r8X5I+vc+2z6cbf+spAmSnpb069S/VxYWluZZGAoHRqYdVSgayrI9WtI7Jf0oIjZ3FSNirQodvP1s971G8JKI6Cla/132942vPOR+/pr9ParvEHYZU7O/F/bZfkGf91+yKiJ+/dJKRLwg6Q5V/10ulzTJ9jttj5d0sAYYBs/O86y0+c7xXWzvqkKRvJ2kPas474KIKNm9LXHOn0i6UoVO6M9V6JR+tIpzAcAWobAERqanJe1Q4b7t2d8HSrzXkf2d0Gd732mDnsr+vqbCc1biVhWGkc+R9ITtX9v+pO3Xlvlcu6THIuKJ4o0R8ZgKw+nlvotU+D7VfpfbsmOdIOmfVeg8/nKgnW0fYfsOSc+p0GF+TIXheKlwN32lVlYZ5+lZbG+X9MmI6K7y8wDwilFYAiPTCkl7Vtnpq0bPANs9wPZiMcD2US/bqeB4Fbqp56swfc5Fkh6w/bZKA63AlnyXzSIiVLjO9HgV7gb/eUQ8V/LA9rtVuK5zkwqF3j+qcI3l57Jdqvm3t+Q5BvEuFaagkgrXnALAkKGwBEamqyVto8LciuWszv6+pcR7k7K/q2oQ00ueUumOXHupnSPinoj4akT8f5LekX32s4Mcf7Wk3fp2NrOh5l1V2+/S1+WSXqfCXKEDDoNL+pCkFyX9Q0T8T0QsjIjfqPTUTAMV4lWz/RpJP5C0TNL/SDorK3IBYEhQWAIj0yUq3DX9rVLdvWw6npmSFBHrVLh558Tiaymzay9PVOHmnUdrGNtDknayvW/RubZXn7ues+sO+3YNV6jQodt5kONfk/3teyf2WX3er7mIWJGd9/+pcPPOQHpVKBg3d2ltbyPpkyX2fUaF6Ya2r0GI31WhW3miCjdcrZQ01/Z2NTg2AJTFdEPACBQR621PU2Hi7rttXynpThWevPMWFabU2V158fVZFab2ucP2S5N8n6ZC17PaOR3LuVLSNyX9wvbFKkzJ81EV7sQeX7TfyZI+afsXKhSjW6lwV/MO2TEGslDSryV9MZvv8k5JB6hQTF0bEb+q7dd5uYi4qILdFkj6tKTf2P6RCt/pZBWmcOrr7uzvf7rwWMie7Cacqtj+JxU62GdFxPJs20mSFkn6lgpPYgKAuqKwBEaoiLgn61Z+WoWJuj+kQodslQrP8J5VtO+ttt+rwmMDz8k23yXpnyPif2sc11NZ0XuRCtdOdqkwT+XfJP2waNffqfBYyA9KGq3CtDr3Szo6Iq4e5Phh+wOSviTpn1S45nGtpG+o+sci1kVE/M72iZK+qMJ3f1SFaYVuk3RDn90vV6Ew/oAKxadV4fykL8kK7P9S4YaomUVx3GH7m5L+zfYvi++OB4B6cOF6dAAAAGDLcI0lAAAAaoLCEgAAoEnZHmX797avLfHeq2zPs/2Q7Tttt5c7HoUlAABA8zpThRk5SjlV0lMRMVH5dfODorAEAABoQtmNf/8o6fsD7DJN0pzs9VWS3ldimriXobAEAABoTt+WdLYKc++WMlbSGkmKiE2S1it/sldJI2K6oY+2H8Ot65m53YtThwAAQEPY9GJXVY92rYeNjz9clxpn693edJqk6UWbZkfES/MYy/ZUSY9GxN22D67VeUdEYQkAAIDKZUXk7EF2OUjSUbaPUOFhGTvavjwiTijap0uFB1t02t5K0k6SnhjsvAyFAwAApNLbU5+ljIj4QkSMi4h2FZ56dlOfolIqPEXspcfxHpPtM2iHlY4lAABAKjHQ5Y1p2D5X0tKIWCDpUkk/sv2QpCdVKEAHRWEJAADQxCLiFkm3ZK+/VLT9eRUeF1wxCksAAIBUeodXx3JLcY0lAAAAaoKOJQAAQCIxzK6x3FIUlgAAAKkwFA4AAAD0R8cSAAAglQYbCqdjCQAAgJqgYwkAAJBKBU/JGUnoWAIAAKAm6FgCAACk0mDXWFJYAgAApMJ0QwAAAEB/dCwBAAASabQn79CxBAAAQE3QsQQAAEilwa6xpLAEAABIhaFwAAAAoD8KywqccsHp+vbSS3Xu9TNTh5LclMMO1v3Lb9UDHYt09llnpA4nKXKRIxc5cpEjFzlykSMXffT21GdJhMKyArdfdbNmnvy11GEk19LSolkXn6epR56gvfc5RMcdd7QmTdojdVhJkIscuciRixy5yJGLHLlofBSWFXjwrhV6Zv2G1GEkt/9++2rlytVateoRbdy4UfPnX62jjpySOqwkyEWOXOTIRY5c5MhFjlyUEL31WRKhsETF2saO1prO7s3rnV1r1dY2OmFE6ZCLHLnIkYscuciRixy5KKG3tz5LIkNyV7jtt0iaJmlstqlL0oKIWDEU5wcAAED91b1jaftzkn4iyZLuyhZLutL25wf53HTbS20v/ePfHq53mKhAd9c6jR/Xtnl93Ngx6u5elzCidMhFjlzkyEWOXOTIRY5clMBQeNVOlbRfRHwzIi7Plm9K2j97r6SImB0RkyNi8p47vHEIwkQ5S5Yu08SJE9TePl6tra069thpuubaG1KHlQS5yJGLHLnIkYscuciRi8Y3FEPhv
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"source": [
"print(classification_report(y_test, y_pred))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ONCikocltz97",
"outputId": "762a6219-0f63-4fb0-9a3d-978787857cf2"
},
"execution_count": 193,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.25 1.00 0.40 1\n",
" 1 0.00 0.00 0.00 1\n",
" 2 0.00 0.00 0.00 1\n",
" 3 0.00 0.00 0.00 1\n",
" 4 0.67 1.00 0.80 4\n",
" 5 0.00 0.00 0.00 1\n",
" 6 0.00 0.00 0.00 1\n",
" 7 0.00 0.00 0.00 2\n",
"\n",
" accuracy 0.42 12\n",
" macro avg 0.11 0.25 0.15 12\n",
"weighted avg 0.24 0.42 0.30 12\n",
"\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n",
"/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n",
"/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### K-Fold Cross Validation"
],
"metadata": {
"id": "6mTCBRDdvsI5"
}
},
{
"cell_type": "code",
"source": [
"X = wine.drop('Recommendation',axis=1)\n",
"y = wine['Recommendation']"
],
"metadata": {
"id": "ECTwhEZwwLkU"
},
"execution_count": 195,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.model_selection import KFold\n",
"kf = KFold(n_splits=5, shuffle=True, random_state=42)\n",
"\n",
"# initialize an empty list to store the performance of each fold\n",
"scores = []\n",
"\n",
"# iterate over each fold\n",
"for train_index, test_index in kf.split(X):\n",
" # extract the training and testing sets\n",
" X_train, X_test = X.iloc[train_index], X.iloc[test_index]\n",
" y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n",
" \n",
" # train the model\n",
" model = tree.DecisionTreeClassifier()\n",
" model.fit(X_train, y_train)\n",
"\n",
" # evaluate the performance on the test set and append the score to the list\n",
" score = model.score(X_test, y_test)\n",
" scores.append(score)"
],
"metadata": {
"id": "qjtOJfy-t_kj"
},
"execution_count": 196,
"outputs": []
},
{
"cell_type": "code",
"source": [
"scores"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4TLrmi_rwX5j",
"outputId": "01b8496d-31d7-4aa9-8518-a2c24a54f0dd"
},
"execution_count": 197,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0.375, 0.0, 0.25, 0.25, 0.25]"
]
},
"metadata": {},
"execution_count": 197
}
]
},
{
"cell_type": "code",
"source": [
"\n",
"# calculate the average score across all folds\n",
"mean_score = sum(scores) / len(scores)\n",
"\n",
"print('Average score across all folds:', mean_score)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "RaaNfImIwDej",
"outputId": "2595e52e-80d3-4d6a-89a7-7d6c955a914e"
},
"execution_count": 198,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Average score across all folds: 0.225\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Use Stratified K-Fold"
],
"metadata": {
"id": "05mPzYQozTA1"
}
},
{
"cell_type": "code",
"source": [
"from sklearn.model_selection import StratifiedKFold\n",
"\n",
"# X is your feature matrix, y is your target variable\n",
"skf = StratifiedKFold(n_splits=5)\n",
"skf_score = []\n",
"\n",
"for train_index, test_index in skf.split(X, y):\n",
" X_train, X_test = X.iloc[train_index], X.iloc[test_index]\n",
" y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n",
"\n",
" # Train and evaluate your model using the training and test sets\n",
" model = tree.DecisionTreeClassifier()\n",
" model.fit(X_train, y_train)\n",
" score = model.score(X_test, y_test)\n",
" skf_score.append(score)\n",
" print(\"Test score: {}\".format(score))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Gvzhfx_owoUT",
"outputId": "dfca45db-9c49-488b-cc71-45e89c52f2ce"
},
"execution_count": 201,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Test score: 0.125\n",
"Test score: 0.375\n",
"Test score: 0.25\n",
"Test score: 0.25\n",
"Test score: 0.5\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.9/dist-packages/sklearn/model_selection/_split.py:700: UserWarning: The least populated class in y has only 3 members, which is less than n_splits=5.\n",
" warnings.warn(\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"skf_score"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3lYM1Kguzj6j",
"outputId": "95c02fca-e8d8-4d60-eb26-9976926186fe"
},
"execution_count": 202,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0.125, 0.375, 0.25, 0.25, 0.5]"
]
},
"metadata": {},
"execution_count": 202
}
]
},
{
"cell_type": "code",
"source": [
"mean_score_skf = sum(skf_score) / len(skf_score)\n",
"\n",
"print('Average score across all folds(skf):', mean_score_skf)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5v9a2zth0GEC",
"outputId": "24263aaa-8693-4cc9-eaf1-190bdd135627"
},
"execution_count": 203,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Average score across all folds(skf): 0.3\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"new_data = [['Red','Light-Bodied','None']]"
],
"metadata": {
"id": "xpP2OCHi1AF6"
},
"execution_count": 216,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.preprocessing import OneHotEncoder\n",
"ohe = OneHotEncoder()\n",
"new_data_transformed = ohe.fit_transform(new_data).reshape(1,-1)"
],
"metadata": {
"id": "GOhB6sZf0OsR"
},
"execution_count": 219,
"outputs": []
},
{
"cell_type": "code",
"source": [
"new_data_transformed.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5w27VUrH039B",
"outputId": "c83493ba-ab65-4ed7-8467-dcbef5776ef3"
},
"execution_count": 220,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(1, 3)"
]
},
"metadata": {},
"execution_count": 220
}
]
},
{
"cell_type": "code",
"source": [
"new_data_transformed[0][0]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gmkyYi7r3aKa",
"outputId": "0e1def97-7c5d-4808-88c5-2c8a930ce711"
},
"execution_count": 226,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<1x3 sparse matrix of type '<class 'numpy.float64'>'\n",
"\twith 3 stored elements in Compressed Sparse Row format>"
]
},
"metadata": {},
"execution_count": 226
}
]
},
{
"cell_type": "markdown",
"source": [
"### Using Pipeline"
],
"metadata": {
"id": "50I5UMNa5MPJ"
}
},
{
"cell_type": "code",
"source": [
"wine = pd.read_excel('/content/wine_data.xlsx')"
],
"metadata": {
"id": "EcbyQ1vW5N4J"
},
"execution_count": 240,
"outputs": []
},
{
"cell_type": "code",
"source": [
"wine['Recommendation'] = wine['Recommendation'].fillna(wine['Recommendation'].value_counts().sort_values(ascending=False).keys()[0])\n",
"wine['White_Wine'] = wine['White_Wine'].fillna('None')\n",
"wine['Red_Wine'] = wine['Red_Wine'].fillna('None')"
],
"metadata": {
"id": "sEfcQb5a5epv"
},
"execution_count": 241,
"outputs": []
},
{
"cell_type": "code",
"source": [
"X = wine.drop('Recommendation',axis=1)\n",
"y = wine['Recommendation']"
],
"metadata": {
"id": "Lfk7Ha6p5-jK"
},
"execution_count": 242,
"outputs": []
},
{
"cell_type": "code",
"source": [
"le = LabelEncoder()\n",
"y = le.fit_transform(y)"
],
"metadata": {
"id": "q4S2u4Js8Muq"
},
"execution_count": 244,
"outputs": []
},
{
"cell_type": "code",
"source": [
"y"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "lSNmXh-g8UYx",
"outputId": "05a0768e-4f5e-4aac-a88a-6d8c4fe013d8"
},
"execution_count": 245,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([4, 7, 6, 3, 4, 2, 6, 1, 7, 0, 3, 5, 4, 2, 6, 1, 7, 0, 3, 5, 4, 7,\n",
" 4, 0, 6, 4, 4, 1, 4, 7, 4, 0, 4, 3, 6, 5, 4, 4, 2, 4])"
]
},
"metadata": {},
"execution_count": 245
}
]
},
{
"cell_type": "code",
"source": [
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.2, random_state=42,stratify=y)"
],
"metadata": {
"id": "3fx7c9hY5wha"
},
"execution_count": 243,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.pipeline import Pipeline\n",
"pipeline = Pipeline([\n",
" ('onehot', OneHotEncoder(handle_unknown='ignore')),\n",
" ('classifier', tree.DecisionTreeClassifier())\n",
"])"
],
"metadata": {
"id": "0G5B1atJ6uhy"
},
"execution_count": 247,
"outputs": []
},
{
"cell_type": "code",
"source": [
"pipeline.fit(X_train, y_train)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 126
},
"id": "wBakGDQX638K",
"outputId": "0ba601ab-befd-42ce-fbfa-b96df3c886fb"
},
"execution_count": 248,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore')),\n",
" ('classifier', DecisionTreeClassifier())])"
],
"text/html": [
"<style>#sk-container-id-3 {color: black;background-color: white;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-con
" (&#x27;classifier&#x27;, DecisionTreeClassifier())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;onehot&#x27;, OneHotEncoder(handle_unknown=&#x27;ignore&#x27;)),\n",
" (&#x27;classifier&#x27;, DecisionTreeClassifier())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier()</pre></div></div></div></div></div></div></div>"
]
},
"metadata": {},
"execution_count": 248
}
]
},
{
"cell_type": "code",
"source": [
"y_pred = pipeline.predict(X_test)\n",
"\n",
"# Evaluate the accuracy of your model\n",
"accuracy = accuracy_score(y_test, y_pred)\n",
"print(f\"Accuracy: {accuracy}\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "swEZCh2_65SR",
"outputId": "bac8fba1-51f6-4f7b-980a-41e8ab182e4a"
},
"execution_count": 249,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Accuracy: 0.375\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.model_selection import cross_val_score\n",
"cross_val_score(pipeline,X_train,y_train,cv=5,scoring='accuracy').mean()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "K8DDm4r1ADEp",
"outputId": "101bcb1b-170b-495f-e568-ec12b645e567"
},
"execution_count": 251,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.9/dist-packages/sklearn/model_selection/_split.py:700: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=5.\n",
" warnings.warn(\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.2857142857142857"
]
},
"metadata": {},
"execution_count": 251
}
]
},
{
"cell_type": "markdown",
"source": [
"### GridSearchCV"
],
"metadata": {
"id": "4FPMpMDHAm8g"
}
},
{
"cell_type": "code",
"source": [
"params = {'classifier__criterion':['gini','entropy'],'classifier__max_depth':[4,5,6,7,8,9,10,11,12,15,20,30,40,50,70,90,120,150]}"
],
"metadata": {
"id": "xd41Bw22BN7r"
},
"execution_count": 255,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.model_selection import GridSearchCV\n",
"grid = GridSearchCV(pipeline,params,cv=5,scoring='accuracy')\n",
"grid.fit(X_train,y_train)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 208
},
"id": "XppiX9naAmaR",
"outputId": "729ad00f-86bf-4d17-9d30-18694fcc6d98"
},
"execution_count": 256,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.9/dist-packages/sklearn/model_selection/_split.py:700: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=5.\n",
" warnings.warn(\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"GridSearchCV(cv=5,\n",
" estimator=Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore')),\n",
" ('classifier',\n",
" DecisionTreeClassifier())]),\n",
" param_grid={'classifier__criterion': ['gini', 'entropy'],\n",
" 'classifier__max_depth': [4, 5, 6, 7, 8, 9, 10, 11, 12,\n",
" 15, 20, 30, 40, 50, 70, 90,\n",
" 120, 150]},\n",
" scoring='accuracy')"
],
"text/html": [
"<style>#sk-container-id-4 {color: black;background-color: white;}#sk-container-id-4 pre{padding: 0;}#sk-container-id-4 div.sk-toggleable {background-color: white;}#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-4 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-4 div.sk-item {position: relative;z-index: 1;}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-4 div.sk-item::before, #sk-container-id-4 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-4 div.sk-label-container {text-align: center;}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-4 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-4\" class=\"sk-top-con
" estimator=Pipeline(steps=[(&#x27;onehot&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;)),\n",
" (&#x27;classifier&#x27;,\n",
" DecisionTreeClassifier())]),\n",
" param_grid={&#x27;classifier__criterion&#x27;: [&#x27;gini&#x27;, &#x27;entropy&#x27;],\n",
" &#x27;classifier__max_depth&#x27;: [4, 5, 6, 7, 8, 9, 10, 11, 12,\n",
" 15, 20, 30, 40, 50, 70, 90,\n",
" 120, 150]},\n",
" scoring=&#x27;accuracy&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">GridSearchCV</label><div class=\"sk-toggleable__content\"><pre>GridSearchCV(cv=5,\n",
" estimator=Pipeline(steps=[(&#x27;onehot&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;)),\n",
" (&#x27;classifier&#x27;,\n",
" DecisionTreeClassifier())]),\n",
" param_grid={&#x27;classifier__criterion&#x27;: [&#x27;gini&#x27;, &#x27;entropy&#x27;],\n",
" &#x27;classifier__max_depth&#x27;: [4, 5, 6, 7, 8, 9, 10, 11, 12,\n",
" 15, 20, 30, 40, 50, 70, 90,\n",
" 120, 150]},\n",
" scoring=&#x27;accuracy&#x27;)</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">estimator: Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;onehot&#x27;, OneHotEncoder(handle_unknown=&#x27;ignore&#x27;)),\n",
" (&#x27;classifier&#x27;, DecisionTreeClassifier())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier()</pre></div></div></div></div></div></div></div></div></div></div></div></div>"
]
},
"metadata": {},
"execution_count": 256
}
]
},
{
"cell_type": "code",
"source": [
"grid.best_score_"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Rmghi5mxBnma",
"outputId": "c2287da2-f266-4606-8197-51d389eea71e"
},
"execution_count": 258,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.35238095238095235"
]
},
"metadata": {},
"execution_count": 258
}
]
},
{
"cell_type": "code",
"source": [
"grid.best_params_"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6oesag6TBs-A",
"outputId": "6dc525d8-8a24-4678-81fd-addab9e346c7"
},
"execution_count": 259,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'classifier__criterion': 'gini', 'classifier__max_depth': 4}"
]
},
"metadata": {},
"execution_count": 259
}
]
},
{
"cell_type": "markdown",
"source": [
"## Exporting the PipeLine"
],
"metadata": {
"id": "ncTd9LuTBxzD"
}
},
{
"cell_type": "code",
"source": [
"import pickle\n",
"pickle.dump(pipeline,open('/content/sample_data/pipe.pkl','wb'))"
],
"metadata": {
"id": "6lNtvIt7B5fa"
},
"execution_count": 260,
"outputs": []
},
{
"cell_type": "code",
"source": [
"pipe = pickle.load(open('/content/sample_data/pipe.pkl','rb'))"
],
"metadata": {
"id": "qREsT8mGCIvL"
},
"execution_count": 261,
"outputs": []
},
{
"cell_type": "code",
"source": [
"new_data = np.array(['Red','Light-Bodied','None'],dtype='object').reshape(1,3)"
],
"metadata": {
"id": "mrZsYNQF68zR"
},
"execution_count": 265,
"outputs": []
},
{
"cell_type": "code",
"source": [
"new_data"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-m-6qe2GCk5o",
"outputId": "85d423c3-5a1e-4072-8f99-6947d8a2dd18"
},
"execution_count": 266,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([['Red', 'Light-Bodied', 'None']], dtype=object)"
]
},
"metadata": {},
"execution_count": 266
}
]
},
{
"cell_type": "code",
"source": [
"pipe.predict(new_data)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "0y6C3YUjC41h",
"outputId": "01b5ba9b-ad77-4e84-c2c2-1eddced5e38b"
},
"execution_count": 267,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.9/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but OneHotEncoder was fitted with feature names\n",
" warnings.warn(\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array(['Pinot Noir'], dtype=object)"
]
},
"metadata": {},
"execution_count": 267
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "-yrFSUIi7Xgx"
},
"execution_count": null,
"outputs": []
}
]
}