2555 lines
320 KiB
Plaintext
2555 lines
320 KiB
Plaintext
|
|
{
|
||
|
|
"nbformat": 4,
|
||
|
|
"nbformat_minor": 0,
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"provenance": []
|
||
|
|
},
|
||
|
|
"kernelspec": {
|
||
|
|
"name": "python3",
|
||
|
|
"display_name": "Python 3"
|
||
|
|
},
|
||
|
|
"language_info": {
|
||
|
|
"name": "python"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"cells": [
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 92,
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "R9zvSxjt24af",
|
||
|
|
"outputId": "de9749be-551f-4613-e586-b70389d852e3"
|
||
|
|
},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
"Dimensions: 178 x 13\n",
|
||
|
|
"\n",
|
||
|
|
"Header: ['alcohol', 'malic acid', 'ash', 'ash alcalinity', 'magnesium', 'total phenols', 'flavanoids', 'nonflavanoid phenols', 'proanthocyanins', 'color intensity', 'hue', 'OD280/OD315 of diluted wines', 'proline']\n",
|
||
|
|
"1st row [1.423e+01 1.710e+00 2.430e+00 1.560e+01 1.270e+02 2.800e+00 3.060e+00\n",
|
||
|
|
" 2.800e-01 2.290e+00 5.640e+00 1.040e+00 3.920e+00 1.065e+03]\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"from mlxtend.data import wine_data\n",
|
||
|
|
"X, y = wine_data()\n",
|
||
|
|
"\n",
|
||
|
|
"print('Dimensions: %s x %s' % (X.shape[0], X.shape[1]))\n",
|
||
|
|
"print('\\nHeader: %s' % ['alcohol', 'malic acid', 'ash', 'ash alcalinity',\n",
|
||
|
|
" 'magnesium', 'total phenols', 'flavanoids',\n",
|
||
|
|
" 'nonflavanoid phenols', 'proanthocyanins',\n",
|
||
|
|
" 'color intensity', 'hue', 'OD280/OD315 of diluted wines',\n",
|
||
|
|
" 'proline'])\n",
|
||
|
|
"print('1st row', X[0])"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"import numpy as np\n",
|
||
|
|
"print('Classes: %s' % np.unique(y))\n",
|
||
|
|
"print('Class distribution: %s' % np.bincount(y))"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "kj6nE9hz3AMs",
|
||
|
|
"outputId": "ae7a436c-6bcd-43de-c171-c34034ef4320"
|
||
|
|
},
|
||
|
|
"execution_count": 93,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
"Classes: [0 1 2]\n",
|
||
|
|
"Class distribution: [59 71 48]\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"X.shape"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "lQ_JL1i4Eo0C",
|
||
|
|
"outputId": "2971e185-5ee1-4785-e6ff-05d042406441"
|
||
|
|
},
|
||
|
|
"execution_count": 94,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"(178, 13)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 94
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"columns = ['alcohol', 'malic acid', 'ash', 'ash alcalinity',\n",
|
||
|
|
" 'magnesium', 'total phenols', 'flavanoids',\n",
|
||
|
|
" 'nonflavanoid phenols', 'proanthocyanins',\n",
|
||
|
|
" 'color intensity', 'hue', 'OD280/OD315 of diluted wines',\n",
|
||
|
|
" 'proline']"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "LIyPC2bsEyEq"
|
||
|
|
},
|
||
|
|
"execution_count": 95,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.model_selection import train_test_split\n",
|
||
|
|
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "KUIpscN0I3T4"
|
||
|
|
},
|
||
|
|
"execution_count": 96,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn import tree\n",
|
||
|
|
"clf = tree.DecisionTreeClassifier()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "bmMYttf2M7GY"
|
||
|
|
},
|
||
|
|
"execution_count": 97,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"# Fit the classifier to the training data\n",
|
||
|
|
"clf.fit(X_train, y_train)\n",
|
||
|
|
"\n",
|
||
|
|
"# Predict the class labels for the test data\n",
|
||
|
|
"y_pred = clf.predict(X_test)\n",
|
||
|
|
"\n",
|
||
|
|
"# Calculate the accuracy of the classifier\n",
|
||
|
|
"accuracy = clf.score(X_test, y_test)\n",
|
||
|
|
"\n",
|
||
|
|
"# Print the results\n",
|
||
|
|
"print(\"Predicted class labels:\", y_pred)\n",
|
||
|
|
"print(\"Accuracy:\", accuracy)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "6mGo2Xb3NX3X",
|
||
|
|
"outputId": "4a9d3837-1c80-41ed-fbfb-ad28c2d0c45b"
|
||
|
|
},
|
||
|
|
"execution_count": 98,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
"Predicted class labels: [0 0 2 0 1 0 1 2 1 2 1 0 0 1 0 1 1 1 0 1 0 1 1 2 2 2 1 1 1 0 0 1 2 0 0 0 2\n",
|
||
|
|
" 2 1 2 1 1 1 1 2 0 1 1 2 0 1 0 0 2]\n",
|
||
|
|
"Accuracy: 0.9444444444444444\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"import matplotlib.pyplot as plt\n",
|
||
|
|
"plt.figure(figsize=(12,8))\n",
|
||
|
|
"tree.plot_tree(clf)\n",
|
||
|
|
"plt.show()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/",
|
||
|
|
"height": 466
|
||
|
|
},
|
||
|
|
"id": "UeqGcdxwN7CE",
|
||
|
|
"outputId": "bf7ad83c-399e-4127-c273-a83cb9296460"
|
||
|
|
},
|
||
|
|
"execution_count": 99,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "display_data",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 864x576 with 1 Axes>"
|
||
|
|
],
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAqsAAAHBCAYAAABOnPJQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAACqOklEQVR4nOzdd1gU19cH8O8FkbYLS1EBAVFiV0Sixi5Yo7GhWKMi9pJgTDQW7LHFoDFRMT9jS8SSYiFGMWLsRqMmAnajgoCiCEgHaef9A9nXlUUpi1s4n+e5zyMzd2bOLNd7D7MzdwQRgTHGGGOMMU2kp+4AGGOMMcYYKw4nq4wxxhhjTGNxssoYY4wxxjQWJ6uMMcYYY0xjcbLKGGOMMcY0FierjDHGGGNMY3GyyhhjjDHGNBYnq4wxxhhjTGNxssoYY4wxxjQWJ6uMMcYYY0xjcbLKGGOMMcY0FierjDHGGGNMY3GyyhhjjDHGNBYnq4wxxhhjTGNxssoYY4wxxjRWFXUHwBjTDcbGxo+zsrJqqDsOplmMjIyeZGZm2qg7DsaY9hJEpO4YGGM6QAhB3J+wVwkhQERC3XEwxrQX3wbAGGOMMcY0FierjDHGGGNMY3GyyhhjjDHGNBYnq4wxxhhjTGNxssoYY4wxxjQWJ6uMMZ2zaNEiVKlSBRKJBFFRUSXaZtasWTA1NYUQAmlpaRUcIWOMsZLiZJUxppP69++PtLQ0ODo6AgByc3Ph5+cHBwcHmJmZwdPTE3FxcfL6X375Ja5fv66ucAEA586dg6urKywsLGBhYYFu3bq9NqacnBz4+vrC1tYWMpkMHh4eCvVnzpyJ+vXrQyqVok6dOli1atXbOA3GGFMpTlYZY5XCV199hSNHjuDvv//GkydPYG5ujhEjRqhs/48fPy73PurVq4eDBw8iMTERT58+Re/evTFw4MBi669btw5HjhzBxYsXER8fj5YtW2LIkCHy9UZGRti3bx+SkpJw+PBhBAQEYOvWreWOkzHG3iZOVhljWufBgwewtLTE8ePHAQCZmZlo1KgRvvnmm2K32b9/P6ZNmwY7OzsYGxtj8eLFCAkJQWRkZJnjyMzMxO7du9GzZ0/UrVu3zPspVK1aNTg4OBROpA89PT3cv38f+fn5SutHRkaia9eucHBwQJUqVeDt7Y2bN28iLy8PAPDFF1+gcePG0NfXR4MGDTBgwACcPXu23HEyxtjbxMkqY0zr1KpVCwEBARg1ahQSEhIwffp0ODk5wdfXt9ht8vPz8fIbtgoTwPDw8FIdm4hw6tQpjB07FnZ2dti0aRO8vLwQExMjr7Ny5UrIZLJiy+sSxuTkZMhkMhgZGWHatGnw8/ODnp7yrnrs2LG4dOkSIiMjkZ2dja1bt6JHjx7Q19dXGvfp06fRuHHjUp0vY4ypWxV1B8AYY2UxdOhQHDlyBB4eHnj69CnCwsIgRPFv9ezduzfWrl2Lzp07w8LCAgsWLIAQAikpKSU+5oYNG+Dv7w9TU1OMGDEC4eHhcHBwKFJv9uzZmD17dpnOy9zcHElJSUhLS8OOHTtQs2bNYuvWqVMHTZo0Qe3ataGvrw97e3v8+eefSuv6+fkhIyMDkyZNKlNcjDGmLnxllTGmtaZMmYKrV69i9OjRqF69+mvrzpkzB126dEH79u1Rr149uLq6QiKRwMrKqsTHi4iIQFxcHFxdXdGsWTPY2tqW9xSKJZFIMHHiRPj4+CA6OlppnSlTpuDx48d48uQJsrKysGjRInTq1KlIAr5ixQr89NNPOHr0KExNTSssZsYYqwicrDLGtNLz588xfvx4+Pj4YOPGjbh58+Zr6xsaGsLf3x8PHjzAo0eP0KtXL2RnZ+O9994r8TH9/f0RFRWFNm3aYPHixahZsyY+/vhjXLhwQaHe8uXLIZFIii1nzpwp0fGICJmZmcXeV3vlyhV4e3ujevXqqFKlCkaPHo3MzEz8+++/8jorV67E//73Pxw/fhz29vYlPlfGGNMUnKwyxrTS559/jmrVqmHLli2YM2cOhg0bhufPnxdbPzY2Fg8ePAAR4b///sPYsWPx6aefwtLSslTHtbKywtSpU3HhwgWcPXsWMpkMw4YNwzvvvCOvM3fuXKSlpRVbOnTooHTf+/fvx40bN5Cfn4/k5GRMnz4dEokErq6uSuu3bt0aO3bsQEJCAvLy8rBjxw5kZGSgQYMGAIBVq1YhICAAJ06cQK1atUp1nowxpik4WWWMaZ3g4GDs2rULP/74I4QQmDlzJqysrDBr1qxit4mKioKHhwdMTU3RtWtX9O7dG8uWLStXHHXr1sUXX3yB+/fvY9u2beXaF1Aw/VWfPn0glUrxzjvv4N69ezh69CikUikAYOfOnQoPSH311VeoVq0aGjduDAsLC3z99df4+eefYWNjA6DgRQePHz9G06ZN5Vd1e/bsWe44GWPsbRIvPx3LGGNlJYQgTelPli5dihUrVsDAwADh4eHyFwO8zpw5cxAQEIDnz58jISGB7+1UkRfTcBX/5BtjjL0BJ6uMMZXQpGSVaQ5OVhlj5cW3ATDGGGOMMY3FySpjjDHGGNNYnKwyxnRWVFQUJBIJ0tPTS1S/Z8+e2LRpUwVHxRhjrDQ4WWWM6SxHR0ekpaWV+GGp4OBgTJgwQeVx3LhxA23btoWJiQkaNmyIkJCQ19ZPTEyEj48PLC0tYW5ujnbt2snXnTx5EkIIhXlbFy5cKF+fk5MDX19f2NraQiaTwcPDA9evX1f5OTHG2NvCySpjjFWgnJwc9O3bF71798azZ8+waNEiDBw4EI8fPy52mwEDBsDY2Bj37t1DYmIi1q5dq7DeyspKYd7WxYsXy9etW7cOR44cwcWLFxEfH4+WLVtiyJAhFXV6jDFW4ThZZYxptfDwcLRq1QpSqRTvv/8+Pv74Y3h5eQEAIiMjIYRAWloaAGD06NGYOHEivLy8IJVK0bhxY1y8eFG+L3d3d6xfv16l8Z0+fRrJycmYNWsWDA0NMWTIEDRt2hQ//fST0vrHjh3D/fv38e2338LCwgL6+vpo2bJliY8XGRmJrl27wsHBAVWqVIG3tzdu3ryJvLw8VZ0SY4y9VZysMsa0Vk5ODvr164d+/fohMTERfn5++PHHH1+7ze7du+Hr64ukpCT06dMHkyZNKtGxCt9WVVxZuXKl0u3Cw8PRpEkT6Ovry5e5urri2rVrSuufP38eDRo0wLhx42BlZQUXFxfs27dPoU5SUhJsbW3h6OiIsWPHIj4+Xr5u7NixuHTpEiIjI5GdnY2tW7eiR48eCsdnjDFtwskqY0xrnT9/HikpKZg9ezYMDAzQoUMH9OnT57Xb9O3bFx07doS+vj5GjRqF8PDwEl11bN++PZKSkoots2fPVrpdWloazM3NFZbJZDKkpqYqrR8dHY2QkBC0bt0ajx8/xpo1azBixAhcvXoVANCgQQOEhYUhJiYGFy5cQGJiosLX/HXq1EGTJk1Qu3ZtmJiYYO/evVi3bt0bz48xxjQVJ6uMMa316NEj2NnZKVw1dHBweO02ha8iBQATExPk5eUhMzOzwmKUSCRISUlRWJacnCx/heqrTExMYG9vj0mTJsHAwABdu3ZFly5dEBwcLI+/cePG0NfXh52dHQICAnD8+HH51dUpU6bg8ePHePLkCbKysrBo0SJ06tSpSAyMMaYtOFlljGktOzs7xMbGKlwZjY6OrpBjnTlzRuEJ/FfL8uXLlW7n4uKCa9euIT8/X74sNDQUTZo0Kba+EEVf+FTc28H09PQU1l+5cgXe3t6oXr06qlSpgtGjRyMzMxP//vtvqc6XMcY0BSerjDGt1aZNG5iamuKrr75CTk4Ozp07h4MHD1bIsTp06KDwBP6rZe7cuUq369ixI8zMzLBq1So8f/4cv/zyC8LDwzF48GCl9T09PZGeno7NmzcjLy8PJ0+exPHjx9GrVy8AwIkTJxAZGQkiQlxcHKZOnYr27dujWrVqAIDWrVtjx44dSEhIQF5eHnbs2IGMjAw0aNCgQj4XxhiraJysMsa0loGBAYKCgvDrr7/CwsICS5YswdChQ2FoaKju0OQKY
|
||
|
|
},
|
||
|
|
"metadata": {
|
||
|
|
"needs_background": "light"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"source": [
|
||
|
|
"# Confusion Metrics"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "6ApROW4KO5Qw"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.metrics import confusion_matrix\n",
|
||
|
|
"import seaborn as sns\n",
|
||
|
|
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
|
||
|
|
"plt.figure(figsize=(12,8))\n",
|
||
|
|
"cm = confusion_matrix(y_test,y_pred)\n",
|
||
|
|
" \n",
|
||
|
|
"#Plot the confusion matrix.\n",
|
||
|
|
"sns.heatmap(cm,\n",
|
||
|
|
" annot=True,\n",
|
||
|
|
" fmt='g',\n",
|
||
|
|
" xticklabels=['0', '1' , '2'],\n",
|
||
|
|
" yticklabels=['0', '1' , '2'])\n",
|
||
|
|
"plt.ylabel('Prediction',fontsize=13)\n",
|
||
|
|
"plt.xlabel('Actual',fontsize=13)\n",
|
||
|
|
"plt.title('Confusion Matrix',fontsize=17)\n",
|
||
|
|
"plt.show()\n",
|
||
|
|
" \n",
|
||
|
|
" "
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/",
|
||
|
|
"height": 520
|
||
|
|
},
|
||
|
|
"id": "bCc29IiJOLKI",
|
||
|
|
"outputId": "b5754110-096a-4a5a-8f42-3a72af060178"
|
||
|
|
},
|
||
|
|
"execution_count": 100,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "display_data",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 864x576 with 2 Axes>"
|
||
|
|
],
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApwAAAH3CAYAAAAWkI+hAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAAt0UlEQVR4nO3debxdZXno8d+TEAYDCIgMGSQgFMUBvAIOaC+IBuQiobRlUDFWMGiVipfiiNLSC9paoCJeMZUAIgSojCIggYKAFSRwESGhDCZKJkDmUZKc5/6xV+LmcKZ9zl7nPTn79/WzPnuvdw3vs2M+8fF51/uuyEwkSZKkuowpHYAkSZJGNxNOSZIk1cqEU5IkSbUy4ZQkSVKtTDglSZJUKxNOSZIk1cqEU9KgRcT7ImJuRDwfERkRO9XQxw0RcUO777umq/68/6F0HJI0ECac0hosIjaJiH+KiF9HxDMR8UJE3BsRp0bEdjX3vRFwEY1/R/4OOBT4XZ19DreIOKtK7F6sfm/342tFxMPVOdcOso/9I+LrQw5WkkawtUoHIGlwImJH4CrgNcAFwPeB5cAOwEHAp4C1awxhZ2Aj4PjMvLTGfqbWeO+BSCCAA4GZ3Y7tBWwG/HEI998f+ChwfIvXrQesGEK/kjRsTDilNVBEbAhcDowDdsnMu7od/wpwQs1hbFZ9PllnJ5n5Up33H4Au4EoaSWH3hPNQ4Db+9GdRq4gYA6ydmS9m5ovD0acktYND6tKaaQbwOuDvuyebAJn5Qmb+7+a2iNgtIq6LiGer7bqIeFe3cz5eDQ/vERHfiIhl1TD9nIjYuum8G4Bzq93rq2tuqI6dFRELu8fUdO8pTW07RcQV1bD0ixGxOCIuiohJzX11f4YzItaNiBMjYmFEvFR9nhgR63Q7b2FEXBsRu0TEL6rf8lBEvOzPZgB+BLwnIrZquveGwH7VsVeIiKMj4qaIeDQi/lg96vD3ERHNvw2YDoyt/mwyIrI6NqXaPzYijoiIe2lUUveujq9+hjMaromIpyLidd3iuDQinouIP2vxN0tS21jhlNZM+9NIPs4fyMkR8efAHGAJf6p8HkEjWdwzM3/R7ZJvAS8CJwKbAn9PI8F8d3X8BGAe8OnqnPnAw638gIh4LXAt8ARwMvAHYAKNYeqJwKJergvgYuCDNJK9X1ZxfRl4C/Chbpe8DvgJcE51/kHASRFxT2b+bIDhXgE8RaPKuerP7y9pVJjPB3pKYP838FPgxzSGvj9A4891Y+Cr1Tkn0Pg//rvRSDx7cjDwahqPTDwOLOx+QmZmRPwN8Bvg7Ih4X9X2CWAa8JnMvG+Av1WS2i8z3dzc1rCNRuLx6xbOn0sjsdu8qW1LGknUr5raPk7jmcVfAGOb2o+q2t/U1PbRqm33bn2dBSzsIYZV955S7U+r9nfpJ/YbgBua9vetrvtGt/O+VbXv09S2sGrbu6ltHRrJ8X8M4M/tLGBF9X0mMK/p2H8CP23q59pu176qh/v9AHgWWKenPrqdO6WK/XlgUg/HE/iHbm0HV+1HA1sDTwNXl/776ubm5uaQurRm2pBGMtGviNgCeDtwTmaurkJm5lIaFb9dIqL7M4jfz8yVTfs/rz63GXzIr/Bk9blf96Hwfuxbff5rt/Z/6XZ8lQWZefWqncz8I3ALrf+WHwFvjIi3R8RkYHd6GU6v+nkeVs9k3zgiNqWRPI8Htm+h38szs8dqbw99ng/MplE5vZhGZfUTLfQlSbUw4ZTWTE8DGwzw3CnV5709HJtXfW7drb378kZPVJ+bDLDPgbiRxnD0scBjEXF1RBwZEa/p57opwKOZ+VhzY2Y+SmNYvr/fAo3f0+pvuam610eBj9CoVF7a28kRsU9E3AK8QKMi/SiNYX1ozO4fqAdbjPNvq9h2Ao7MzCUtXi9JbWfCKa2Z5gPbt1gZbMXKXtqjl/Zm2Uv72Jed1HAIjerrP9NY5ucU4N6IePNAAx2AofyW1TIzaTzHegiN2ekXZ+YLPd444t00nhtdQSMB/F80nuH8YnVKK//29thHH95BY6ksaDzTKknFmXBKa6bLgHVprA3Zn4XV5xt6OPbG6nNBG2Ja5Ql6ruBN6enkzLwjM/8pM/8n8D+qa4/u4/4Lgdd2r4RWQ9ab0t7f0t2PgM1prHXa63A68NfAS8D7M/PfM/PKzLyWnpeQ6i1Bb1lEbALMAu4E/h04pkp+JakoE05pzfR9GrO4T+qpGlgtG3QyQGYuozFp6NDmZzWrZzsPpTFp6JE2xvYA8OqIeFtTX+vTbRZ29Vxj9yrjfBoVvY36uP9Pqs/uM8OP6Xa87TJzftXv12hMGupNF41EcnVVNyLWBY7s4dznaCyLtH4bQvwejermoTQmej0I/DAixrfh3pI0aC6LJK2BMvOpiJhGY0Hy2yNiNnArjTcNvYHG0j+b8aek7GgaSxDdEhGrFi8/gkaVtNU1KfszG/gmcElEfJvG0kGfoDEzfHLTedOBIyPiEhpJ6lo0ZllvUN2jN1cCVwNfqdbrvBV4J40k64rMvKq9P+flMvOUAZx2OfB54NqIOIfGb5pOY6mp7m6vPr8Tjddjrqwm/7QkIj5Mo+J9TGbeXbV9DLgZOInGm6ckqQgTTmkNlZl3VNXNz9NYgPyvaVTUFtB4x/mpTefeGBHvo/H6xGOr5l8BH8nM/2pzXE9UyfApNJ7NXExjnc1ngDObTv05jddj/iWwBY3lf+4B9s/My/q4f0bEAcDXgQ/TeKZyKfANWn89ZC0y8+cRcSjwFRq//REayx/dBFzT7fQf0UiYD6CRlAYDXF91lSrx/i6NiVgnN8VxS0R8E/hqRFzaPFtfkoZTNJ6DlyRJkurhM5ySJEmqlQmnJEmSamXCKUmSpFqZcEqSJKlWJpySJEmq1RqxLNILP/yyU+k1Krz16O4r4khrngVPLSsdgtQWK15a3NIrbuuw/A+/rSXHGbfpNsV/WzMrnJIkSarVGlHhlCRJGpW6VpaOYFiYcEqSJJWSXaUjGBYOqUuSJKlWVjglSZJK6bLCKUmSJA2ZFU5JkqRCskOe4TThlCRJKsUhdUmSJGnorHBKkiSV0iFD6lY4JUmSVCsrnJIkSaV0yJuGrHBKkiSpVlY4JUmSSumQZzhNOCVJkkpxWSRJkiRp6Ew4JUmSCsnsqmXrT0RMjojrI2JeRNwTEZ+r2jeJiDkRcX/1uXEv10+vzrk/Iqb3158JpyRJUudZARydmTsA7wQ+ExE7AF8CrsvM7YDrqv2XiYhNgOOAdwC7Asf1lpiuYsIpSZJUSldXPVs/MnNpZt5RfX8GmA9MBKYBZ1ennQ3s38PlewFzMvPxzHwCmAPs3Vd/ThqSJEkqZQTMUo+IKcDbgFuBzTNzaXVoGbB5D5dMBB5q2l9UtfXKCqckSdIoExEzImJu0zajl/PWBy4CjsrMp5uPZWYC2Y54rHBKkiSVUtObhjJzJjCzr3MiYhyNZPPczLy4an44IrbMzKURsSXwSA+XLgZ2b9qfBNzQV19WOCVJkjpMRARwBjA/M09uOnQ5sGrW+XTgsh4u/xkwNSI2riYLTa3aemWFU5IkqZRyz3DuBhwK/CYi7qzavgJ8E7gwIg4DfgccCBAROwOfyszDM/PxiPgn4LbquuMz8/G+OjPhlCRJKqXQm4Yy82Ygejm8Zw/nzwUOb9qfBcwaaH8OqUuSJKlWVjglSZJKGQHLIg0HK5ySJEmqlRVOSZKkUgo9wzncTDglSZIKyaxnHc6RxiF1SZIk1coKpyRJUilOGpIkSZKGzgqnJElSKR0yacgKpyRJkmplhVOSJKmUDnmG04RTkiSplC6XRZIkSZKGzAqnJElSKR0ypG6FU5IkSbWywilJklRKhyyLZMIpSZJUikPqkiRJ0tBZ4ZQkSSqlQ4bUrXBKkiSpVlY4JUmSSumQCqcJpyRJUiGZvmlIkiRJGjIrnJIkSaV0yJC6FU5JkiTVygqnJElSKS78LkmSJA2dFU5JkqRSOuQZThNOSZKkUhxSlyRJkobOCqckSVIpHTKkboVTkiRJtbLCKUmSVEqHPMNpwilJklSKQ+qSJEnS0
|
||
|
|
},
|
||
|
|
"metadata": {
|
||
|
|
"needs_background": "light"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.metrics import accuracy_score\n",
|
||
|
|
"accuracy_score(y_test,y_pred)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "KGoGXwRkPHBH",
|
||
|
|
"outputId": "10bfeed7-0d53-409e-ad12-723de04671f6"
|
||
|
|
},
|
||
|
|
"execution_count": 101,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"0.9444444444444444"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 101
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.metrics import classification_report\n",
|
||
|
|
"print(classification_report(y_test, y_pred))"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "TOzbcfXSQUGo",
|
||
|
|
"outputId": "24336c98-a019-44a4-c55c-cdccd3a4da24"
|
||
|
|
},
|
||
|
|
"execution_count": 102,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
" precision recall f1-score support\n",
|
||
|
|
"\n",
|
||
|
|
" 0 0.94 0.89 0.92 19\n",
|
||
|
|
" 1 0.91 1.00 0.95 21\n",
|
||
|
|
" 2 1.00 0.93 0.96 14\n",
|
||
|
|
"\n",
|
||
|
|
" accuracy 0.94 54\n",
|
||
|
|
" macro avg 0.95 0.94 0.95 54\n",
|
||
|
|
"weighted avg 0.95 0.94 0.94 54\n",
|
||
|
|
"\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"source": [
|
||
|
|
"### Wine Classificaion using the wine_data.xlsx"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "Lbhxu3u6i4at"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"source": [
|
||
|
|
"### Loading the data"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "wCtbWMDejBW9"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine = pd.read_excel('/content/wine_data.xlsx')"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "9Udcqv9QQ1f_"
|
||
|
|
},
|
||
|
|
"execution_count": 163,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine.head()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/",
|
||
|
|
"height": 206
|
||
|
|
},
|
||
|
|
"id": "bUDrNfnHjtBs",
|
||
|
|
"outputId": "d8d0ea56-ffd8-4c2c-ef49-22bc745161c4"
|
||
|
|
},
|
||
|
|
"execution_count": 164,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
" Preference Red_Wine White_Wine Recommendation\n",
|
||
|
|
"0 Red Light-Bodied NaN Pinot Noir\n",
|
||
|
|
"1 Red Full-Bodied NaN Shiraz or Zinfandel\n",
|
||
|
|
"2 White NaN Dry Sauvignon Blanc\n",
|
||
|
|
"3 White NaN Sweet Gewurztraminer\n",
|
||
|
|
"4 Red-Fruity NaN NaN Pinot Noir"
|
||
|
|
],
|
||
|
|
"text/html": [
|
||
|
|
"\n",
|
||
|
|
" <div id=\"df-87941f68-bb2e-4645-ac49-58b5086dae00\">\n",
|
||
|
|
" <div class=\"colab-df-container\">\n",
|
||
|
|
" <div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Preference</th>\n",
|
||
|
|
" <th>Red_Wine</th>\n",
|
||
|
|
" <th>White_Wine</th>\n",
|
||
|
|
" <th>Recommendation</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>Red</td>\n",
|
||
|
|
" <td>Light-Bodied</td>\n",
|
||
|
|
" <td>NaN</td>\n",
|
||
|
|
" <td>Pinot Noir</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>Red</td>\n",
|
||
|
|
" <td>Full-Bodied</td>\n",
|
||
|
|
" <td>NaN</td>\n",
|
||
|
|
" <td>Shiraz or Zinfandel</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>White</td>\n",
|
||
|
|
" <td>NaN</td>\n",
|
||
|
|
" <td>Dry</td>\n",
|
||
|
|
" <td>Sauvignon Blanc</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>White</td>\n",
|
||
|
|
" <td>NaN</td>\n",
|
||
|
|
" <td>Sweet</td>\n",
|
||
|
|
" <td>Gewurztraminer</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>Red-Fruity</td>\n",
|
||
|
|
" <td>NaN</td>\n",
|
||
|
|
" <td>NaN</td>\n",
|
||
|
|
" <td>Pinot Noir</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>\n",
|
||
|
|
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-87941f68-bb2e-4645-ac49-58b5086dae00')\"\n",
|
||
|
|
" title=\"Convert this dataframe to an interactive table.\"\n",
|
||
|
|
" style=\"display:none;\">\n",
|
||
|
|
" \n",
|
||
|
|
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
||
|
|
" width=\"24px\">\n",
|
||
|
|
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
||
|
|
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
||
|
|
" </svg>\n",
|
||
|
|
" </button>\n",
|
||
|
|
" \n",
|
||
|
|
" <style>\n",
|
||
|
|
" .colab-df-container {\n",
|
||
|
|
" display:flex;\n",
|
||
|
|
" flex-wrap:wrap;\n",
|
||
|
|
" gap: 12px;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .colab-df-convert {\n",
|
||
|
|
" background-color: #E8F0FE;\n",
|
||
|
|
" border: none;\n",
|
||
|
|
" border-radius: 50%;\n",
|
||
|
|
" cursor: pointer;\n",
|
||
|
|
" display: none;\n",
|
||
|
|
" fill: #1967D2;\n",
|
||
|
|
" height: 32px;\n",
|
||
|
|
" padding: 0 0 0 0;\n",
|
||
|
|
" width: 32px;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .colab-df-convert:hover {\n",
|
||
|
|
" background-color: #E2EBFA;\n",
|
||
|
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||
|
|
" fill: #174EA6;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" [theme=dark] .colab-df-convert {\n",
|
||
|
|
" background-color: #3B4455;\n",
|
||
|
|
" fill: #D2E3FC;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" [theme=dark] .colab-df-convert:hover {\n",
|
||
|
|
" background-color: #434B5C;\n",
|
||
|
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
||
|
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
||
|
|
" fill: #FFFFFF;\n",
|
||
|
|
" }\n",
|
||
|
|
" </style>\n",
|
||
|
|
"\n",
|
||
|
|
" <script>\n",
|
||
|
|
" const buttonEl =\n",
|
||
|
|
" document.querySelector('#df-87941f68-bb2e-4645-ac49-58b5086dae00 button.colab-df-convert');\n",
|
||
|
|
" buttonEl.style.display =\n",
|
||
|
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||
|
|
"\n",
|
||
|
|
" async function convertToInteractive(key) {\n",
|
||
|
|
" const element = document.querySelector('#df-87941f68-bb2e-4645-ac49-58b5086dae00');\n",
|
||
|
|
" const dataTable =\n",
|
||
|
|
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
||
|
|
" [key], {});\n",
|
||
|
|
" if (!dataTable) return;\n",
|
||
|
|
"\n",
|
||
|
|
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
||
|
|
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
||
|
|
" + ' to learn more about interactive tables.';\n",
|
||
|
|
" element.innerHTML = '';\n",
|
||
|
|
" dataTable['output_type'] = 'display_data';\n",
|
||
|
|
" await google.colab.output.renderOutput(dataTable, element);\n",
|
||
|
|
" const docLink = document.createElement('div');\n",
|
||
|
|
" docLink.innerHTML = docLinkHtml;\n",
|
||
|
|
" element.appendChild(docLink);\n",
|
||
|
|
" }\n",
|
||
|
|
" </script>\n",
|
||
|
|
" </div>\n",
|
||
|
|
" </div>\n",
|
||
|
|
" "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 164
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine.value_counts()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "Nxoyk2DUjvwD",
|
||
|
|
"outputId": "f63bb36f-d3a3-4696-e128-2a6f53da402b"
|
||
|
|
},
|
||
|
|
"execution_count": 165,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"Series([], dtype: int64)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 165
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine.info()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "XM0ZWZq_j1Dj",
|
||
|
|
"outputId": "c4f3eaa0-1a30-4806-fb3a-81329552fb82"
|
||
|
|
},
|
||
|
|
"execution_count": 166,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
|
|
"RangeIndex: 40 entries, 0 to 39\n",
|
||
|
|
"Data columns (total 4 columns):\n",
|
||
|
|
" # Column Non-Null Count Dtype \n",
|
||
|
|
"--- ------ -------------- ----- \n",
|
||
|
|
" 0 Preference 40 non-null object\n",
|
||
|
|
" 1 Red_Wine 18 non-null object\n",
|
||
|
|
" 2 White_Wine 6 non-null object\n",
|
||
|
|
" 3 Recommendation 35 non-null object\n",
|
||
|
|
"dtypes: object(4)\n",
|
||
|
|
"memory usage: 1.4+ KB\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine['Preference'].value_counts()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "G4IfBAQtj2Oc",
|
||
|
|
"outputId": "51fbc5a9-2080-4637-8ee6-164b59146f40"
|
||
|
|
},
|
||
|
|
"execution_count": 167,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"Red 6\n",
|
||
|
|
"White 6\n",
|
||
|
|
"Red-Fruity 5\n",
|
||
|
|
"Red-Earthy 5\n",
|
||
|
|
"White-Crisp 3\n",
|
||
|
|
"White-Creamy 3\n",
|
||
|
|
"Red-Spicy 3\n",
|
||
|
|
"Red-Rich 3\n",
|
||
|
|
"White-Floral 3\n",
|
||
|
|
"White-Citrus 3\n",
|
||
|
|
"Name: Preference, dtype: int64"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 167
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine['Red_Wine'].value_counts()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "vrGJsrb0j7Qk",
|
||
|
|
"outputId": "067da933-1552-49e0-aed2-347d1641e142"
|
||
|
|
},
|
||
|
|
"execution_count": 168,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"Light-Bodied 5\n",
|
||
|
|
"Full-Bodied 5\n",
|
||
|
|
"Dry 4\n",
|
||
|
|
"Sweet 4\n",
|
||
|
|
"Name: Red_Wine, dtype: int64"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 168
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine['White_Wine'].value_counts()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "bR5Ie3bakC1E",
|
||
|
|
"outputId": "3c9a81dc-ad0c-4a38-e4d7-4073bd222b37"
|
||
|
|
},
|
||
|
|
"execution_count": 169,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"Dry 3\n",
|
||
|
|
"Sweet 3\n",
|
||
|
|
"Name: White_Wine, dtype: int64"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 169
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine['Recommendation'].value_counts()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "iATAENA3kGN0",
|
||
|
|
"outputId": "92f4684d-27f4-4636-c766-caa0f13e575b"
|
||
|
|
},
|
||
|
|
"execution_count": 170,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"Pinot Noir 8\n",
|
||
|
|
"Shiraz or Zinfandel 5\n",
|
||
|
|
"Sauvignon Blanc 5\n",
|
||
|
|
"Gewurztraminer 4\n",
|
||
|
|
"Cabernet Sauvignon 4\n",
|
||
|
|
"Chianti 3\n",
|
||
|
|
"Chardonnay 3\n",
|
||
|
|
"Riesling 3\n",
|
||
|
|
"Name: Recommendation, dtype: int64"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 170
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine.isnull().sum()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "hXVqoyd0kKiE",
|
||
|
|
"outputId": "25a594a3-90fc-48e8-eb96-44d46d1f4dbc"
|
||
|
|
},
|
||
|
|
"execution_count": 171,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"Preference 0\n",
|
||
|
|
"Red_Wine 22\n",
|
||
|
|
"White_Wine 34\n",
|
||
|
|
"Recommendation 5\n",
|
||
|
|
"dtype: int64"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 171
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"source": [
|
||
|
|
"## Handing Null Values"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "iaIBH_PQkU2l"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"# Recommendation\n",
|
||
|
|
"'''\n",
|
||
|
|
"1.it is the class label and the dataset is also small\n",
|
||
|
|
"2. so i have planned to replace tha null values using the most frequent values(i.e Pinot Noir)\n",
|
||
|
|
"'''\n",
|
||
|
|
"wine['Recommendation'] = wine['Recommendation'].fillna(wine['Recommendation'].value_counts().sort_values(ascending=False).keys()[0])"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "8fcr-w1EkTGc"
|
||
|
|
},
|
||
|
|
"execution_count": 172,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine['Recommendation'].value_counts().sort_values(ascending=False).keys()[0]"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/",
|
||
|
|
"height": 36
|
||
|
|
},
|
||
|
|
"id": "ma_R478MlDk-",
|
||
|
|
"outputId": "0d5f3e9a-1eb0-4a09-99fb-8145b1147089"
|
||
|
|
},
|
||
|
|
"execution_count": 173,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"'Pinot Noir'"
|
||
|
|
],
|
||
|
|
"application/vnd.google.colaboratory.intrinsic+json": {
|
||
|
|
"type": "string"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 173
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine['Recommendation'].isnull().sum()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "yX8GGdHrlFoF",
|
||
|
|
"outputId": "5579cf7d-2f7b-4489-d09f-f424afefd402"
|
||
|
|
},
|
||
|
|
"execution_count": 174,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"0"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 174
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"# White_Wine\n",
|
||
|
|
"'''\n",
|
||
|
|
"1. White_Wine has 2 classes [Dry , Sweet]\n",
|
||
|
|
"2. Replace with 'None' as a New label\n",
|
||
|
|
"'''\n",
|
||
|
|
"\n",
|
||
|
|
"wine['White_Wine'] = wine['White_Wine'].fillna('None')\n",
|
||
|
|
"wine['White_Wine'].isnull().sum()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "ZD3sh5V4lY-s",
|
||
|
|
"outputId": "621baedb-3daf-49a7-87e7-45bde6701212"
|
||
|
|
},
|
||
|
|
"execution_count": 175,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"0"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 175
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"# Red_Wine\n",
|
||
|
|
"'''\n",
|
||
|
|
"1. 22 null values\n",
|
||
|
|
"2. replace with 'None'\n",
|
||
|
|
"'''\n",
|
||
|
|
"\n",
|
||
|
|
"wine['Red_Wine'] = wine['Red_Wine'].fillna('None')\n",
|
||
|
|
"wine['Red_Wine'].isnull().sum()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "gshPG42-nTgE",
|
||
|
|
"outputId": "b9a2554f-2c33-4cda-9f2d-895f1a290f2a"
|
||
|
|
},
|
||
|
|
"execution_count": 176,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"0"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 176
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine.head()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/",
|
||
|
|
"height": 206
|
||
|
|
},
|
||
|
|
"id": "s7Ip6blqnlGU",
|
||
|
|
"outputId": "557ecc39-366c-4a2f-bfbc-db313ff9b10a"
|
||
|
|
},
|
||
|
|
"execution_count": 177,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
" Preference Red_Wine White_Wine Recommendation\n",
|
||
|
|
"0 Red Light-Bodied None Pinot Noir\n",
|
||
|
|
"1 Red Full-Bodied None Shiraz or Zinfandel\n",
|
||
|
|
"2 White None Dry Sauvignon Blanc\n",
|
||
|
|
"3 White None Sweet Gewurztraminer\n",
|
||
|
|
"4 Red-Fruity None None Pinot Noir"
|
||
|
|
],
|
||
|
|
"text/html": [
|
||
|
|
"\n",
|
||
|
|
" <div id=\"df-4d516507-4e1e-4fd2-824a-8b4b2343d008\">\n",
|
||
|
|
" <div class=\"colab-df-container\">\n",
|
||
|
|
" <div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Preference</th>\n",
|
||
|
|
" <th>Red_Wine</th>\n",
|
||
|
|
" <th>White_Wine</th>\n",
|
||
|
|
" <th>Recommendation</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>Red</td>\n",
|
||
|
|
" <td>Light-Bodied</td>\n",
|
||
|
|
" <td>None</td>\n",
|
||
|
|
" <td>Pinot Noir</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>Red</td>\n",
|
||
|
|
" <td>Full-Bodied</td>\n",
|
||
|
|
" <td>None</td>\n",
|
||
|
|
" <td>Shiraz or Zinfandel</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>White</td>\n",
|
||
|
|
" <td>None</td>\n",
|
||
|
|
" <td>Dry</td>\n",
|
||
|
|
" <td>Sauvignon Blanc</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>White</td>\n",
|
||
|
|
" <td>None</td>\n",
|
||
|
|
" <td>Sweet</td>\n",
|
||
|
|
" <td>Gewurztraminer</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>Red-Fruity</td>\n",
|
||
|
|
" <td>None</td>\n",
|
||
|
|
" <td>None</td>\n",
|
||
|
|
" <td>Pinot Noir</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>\n",
|
||
|
|
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-4d516507-4e1e-4fd2-824a-8b4b2343d008')\"\n",
|
||
|
|
" title=\"Convert this dataframe to an interactive table.\"\n",
|
||
|
|
" style=\"display:none;\">\n",
|
||
|
|
" \n",
|
||
|
|
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
||
|
|
" width=\"24px\">\n",
|
||
|
|
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
||
|
|
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
||
|
|
" </svg>\n",
|
||
|
|
" </button>\n",
|
||
|
|
" \n",
|
||
|
|
" <style>\n",
|
||
|
|
" .colab-df-container {\n",
|
||
|
|
" display:flex;\n",
|
||
|
|
" flex-wrap:wrap;\n",
|
||
|
|
" gap: 12px;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .colab-df-convert {\n",
|
||
|
|
" background-color: #E8F0FE;\n",
|
||
|
|
" border: none;\n",
|
||
|
|
" border-radius: 50%;\n",
|
||
|
|
" cursor: pointer;\n",
|
||
|
|
" display: none;\n",
|
||
|
|
" fill: #1967D2;\n",
|
||
|
|
" height: 32px;\n",
|
||
|
|
" padding: 0 0 0 0;\n",
|
||
|
|
" width: 32px;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .colab-df-convert:hover {\n",
|
||
|
|
" background-color: #E2EBFA;\n",
|
||
|
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||
|
|
" fill: #174EA6;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" [theme=dark] .colab-df-convert {\n",
|
||
|
|
" background-color: #3B4455;\n",
|
||
|
|
" fill: #D2E3FC;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" [theme=dark] .colab-df-convert:hover {\n",
|
||
|
|
" background-color: #434B5C;\n",
|
||
|
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
||
|
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
||
|
|
" fill: #FFFFFF;\n",
|
||
|
|
" }\n",
|
||
|
|
" </style>\n",
|
||
|
|
"\n",
|
||
|
|
" <script>\n",
|
||
|
|
" const buttonEl =\n",
|
||
|
|
" document.querySelector('#df-4d516507-4e1e-4fd2-824a-8b4b2343d008 button.colab-df-convert');\n",
|
||
|
|
" buttonEl.style.display =\n",
|
||
|
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||
|
|
"\n",
|
||
|
|
" async function convertToInteractive(key) {\n",
|
||
|
|
" const element = document.querySelector('#df-4d516507-4e1e-4fd2-824a-8b4b2343d008');\n",
|
||
|
|
" const dataTable =\n",
|
||
|
|
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
||
|
|
" [key], {});\n",
|
||
|
|
" if (!dataTable) return;\n",
|
||
|
|
"\n",
|
||
|
|
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
||
|
|
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
||
|
|
" + ' to learn more about interactive tables.';\n",
|
||
|
|
" element.innerHTML = '';\n",
|
||
|
|
" dataTable['output_type'] = 'display_data';\n",
|
||
|
|
" await google.colab.output.renderOutput(dataTable, element);\n",
|
||
|
|
" const docLink = document.createElement('div');\n",
|
||
|
|
" docLink.innerHTML = docLinkHtml;\n",
|
||
|
|
" element.appendChild(docLink);\n",
|
||
|
|
" }\n",
|
||
|
|
" </script>\n",
|
||
|
|
" </div>\n",
|
||
|
|
" </div>\n",
|
||
|
|
" "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 177
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"source": [
|
||
|
|
"### One Hot Encoding"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "Eehc1X-npf7-"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
||
|
|
"wine = pd.get_dummies(wine, columns=['Preference', 'Red_Wine','White_Wine'])"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "KsTVHcsvnnQb"
|
||
|
|
},
|
||
|
|
"execution_count": 178,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine.head()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/",
|
||
|
|
"height": 305
|
||
|
|
},
|
||
|
|
"id": "4niIcd36pyUt",
|
||
|
|
"outputId": "410a9df9-294d-403f-b315-10350964c8db"
|
||
|
|
},
|
||
|
|
"execution_count": 179,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
" Recommendation Preference_Red Preference_Red-Earthy \\\n",
|
||
|
|
"0 Pinot Noir 1 0 \n",
|
||
|
|
"1 Shiraz or Zinfandel 1 0 \n",
|
||
|
|
"2 Sauvignon Blanc 0 0 \n",
|
||
|
|
"3 Gewurztraminer 0 0 \n",
|
||
|
|
"4 Pinot Noir 0 0 \n",
|
||
|
|
"\n",
|
||
|
|
" Preference_Red-Fruity Preference_Red-Rich Preference_Red-Spicy \\\n",
|
||
|
|
"0 0 0 0 \n",
|
||
|
|
"1 0 0 0 \n",
|
||
|
|
"2 0 0 0 \n",
|
||
|
|
"3 0 0 0 \n",
|
||
|
|
"4 1 0 0 \n",
|
||
|
|
"\n",
|
||
|
|
" Preference_White Preference_White-Citrus Preference_White-Creamy \\\n",
|
||
|
|
"0 0 0 0 \n",
|
||
|
|
"1 0 0 0 \n",
|
||
|
|
"2 1 0 0 \n",
|
||
|
|
"3 1 0 0 \n",
|
||
|
|
"4 0 0 0 \n",
|
||
|
|
"\n",
|
||
|
|
" Preference_White-Crisp Preference_White-Floral Red_Wine_Dry \\\n",
|
||
|
|
"0 0 0 0 \n",
|
||
|
|
"1 0 0 0 \n",
|
||
|
|
"2 0 0 0 \n",
|
||
|
|
"3 0 0 0 \n",
|
||
|
|
"4 0 0 0 \n",
|
||
|
|
"\n",
|
||
|
|
" Red_Wine_Full-Bodied Red_Wine_Light-Bodied Red_Wine_None Red_Wine_Sweet \\\n",
|
||
|
|
"0 0 1 0 0 \n",
|
||
|
|
"1 1 0 0 0 \n",
|
||
|
|
"2 0 0 1 0 \n",
|
||
|
|
"3 0 0 1 0 \n",
|
||
|
|
"4 0 0 1 0 \n",
|
||
|
|
"\n",
|
||
|
|
" White_Wine_Dry White_Wine_None White_Wine_Sweet \n",
|
||
|
|
"0 0 1 0 \n",
|
||
|
|
"1 0 1 0 \n",
|
||
|
|
"2 1 0 0 \n",
|
||
|
|
"3 0 0 1 \n",
|
||
|
|
"4 0 1 0 "
|
||
|
|
],
|
||
|
|
"text/html": [
|
||
|
|
"\n",
|
||
|
|
" <div id=\"df-ff8ea101-881d-4f5f-b77f-bf849cbdf12a\">\n",
|
||
|
|
" <div class=\"colab-df-container\">\n",
|
||
|
|
" <div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Recommendation</th>\n",
|
||
|
|
" <th>Preference_Red</th>\n",
|
||
|
|
" <th>Preference_Red-Earthy</th>\n",
|
||
|
|
" <th>Preference_Red-Fruity</th>\n",
|
||
|
|
" <th>Preference_Red-Rich</th>\n",
|
||
|
|
" <th>Preference_Red-Spicy</th>\n",
|
||
|
|
" <th>Preference_White</th>\n",
|
||
|
|
" <th>Preference_White-Citrus</th>\n",
|
||
|
|
" <th>Preference_White-Creamy</th>\n",
|
||
|
|
" <th>Preference_White-Crisp</th>\n",
|
||
|
|
" <th>Preference_White-Floral</th>\n",
|
||
|
|
" <th>Red_Wine_Dry</th>\n",
|
||
|
|
" <th>Red_Wine_Full-Bodied</th>\n",
|
||
|
|
" <th>Red_Wine_Light-Bodied</th>\n",
|
||
|
|
" <th>Red_Wine_None</th>\n",
|
||
|
|
" <th>Red_Wine_Sweet</th>\n",
|
||
|
|
" <th>White_Wine_Dry</th>\n",
|
||
|
|
" <th>White_Wine_None</th>\n",
|
||
|
|
" <th>White_Wine_Sweet</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>Pinot Noir</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>Shiraz or Zinfandel</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>Sauvignon Blanc</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>Gewurztraminer</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>Pinot Noir</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>\n",
|
||
|
|
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ff8ea101-881d-4f5f-b77f-bf849cbdf12a')\"\n",
|
||
|
|
" title=\"Convert this dataframe to an interactive table.\"\n",
|
||
|
|
" style=\"display:none;\">\n",
|
||
|
|
" \n",
|
||
|
|
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
||
|
|
" width=\"24px\">\n",
|
||
|
|
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
||
|
|
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
||
|
|
" </svg>\n",
|
||
|
|
" </button>\n",
|
||
|
|
" \n",
|
||
|
|
" <style>\n",
|
||
|
|
" .colab-df-container {\n",
|
||
|
|
" display:flex;\n",
|
||
|
|
" flex-wrap:wrap;\n",
|
||
|
|
" gap: 12px;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .colab-df-convert {\n",
|
||
|
|
" background-color: #E8F0FE;\n",
|
||
|
|
" border: none;\n",
|
||
|
|
" border-radius: 50%;\n",
|
||
|
|
" cursor: pointer;\n",
|
||
|
|
" display: none;\n",
|
||
|
|
" fill: #1967D2;\n",
|
||
|
|
" height: 32px;\n",
|
||
|
|
" padding: 0 0 0 0;\n",
|
||
|
|
" width: 32px;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .colab-df-convert:hover {\n",
|
||
|
|
" background-color: #E2EBFA;\n",
|
||
|
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||
|
|
" fill: #174EA6;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" [theme=dark] .colab-df-convert {\n",
|
||
|
|
" background-color: #3B4455;\n",
|
||
|
|
" fill: #D2E3FC;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" [theme=dark] .colab-df-convert:hover {\n",
|
||
|
|
" background-color: #434B5C;\n",
|
||
|
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
||
|
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
||
|
|
" fill: #FFFFFF;\n",
|
||
|
|
" }\n",
|
||
|
|
" </style>\n",
|
||
|
|
"\n",
|
||
|
|
" <script>\n",
|
||
|
|
" const buttonEl =\n",
|
||
|
|
" document.querySelector('#df-ff8ea101-881d-4f5f-b77f-bf849cbdf12a button.colab-df-convert');\n",
|
||
|
|
" buttonEl.style.display =\n",
|
||
|
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||
|
|
"\n",
|
||
|
|
" async function convertToInteractive(key) {\n",
|
||
|
|
" const element = document.querySelector('#df-ff8ea101-881d-4f5f-b77f-bf849cbdf12a');\n",
|
||
|
|
" const dataTable =\n",
|
||
|
|
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
||
|
|
" [key], {});\n",
|
||
|
|
" if (!dataTable) return;\n",
|
||
|
|
"\n",
|
||
|
|
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
||
|
|
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
||
|
|
" + ' to learn more about interactive tables.';\n",
|
||
|
|
" element.innerHTML = '';\n",
|
||
|
|
" dataTable['output_type'] = 'display_data';\n",
|
||
|
|
" await google.colab.output.renderOutput(dataTable, element);\n",
|
||
|
|
" const docLink = document.createElement('div');\n",
|
||
|
|
" docLink.innerHTML = docLinkHtml;\n",
|
||
|
|
" element.appendChild(docLink);\n",
|
||
|
|
" }\n",
|
||
|
|
" </script>\n",
|
||
|
|
" </div>\n",
|
||
|
|
" </div>\n",
|
||
|
|
" "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 179
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"y = wine['Recommendation']\n",
|
||
|
|
"y"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "zIUiu-Wwp40s",
|
||
|
|
"outputId": "873fdc96-b9dc-49de-bfc9-28f4b58e5f6a"
|
||
|
|
},
|
||
|
|
"execution_count": 180,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"0 Pinot Noir\n",
|
||
|
|
"1 Shiraz or Zinfandel\n",
|
||
|
|
"2 Sauvignon Blanc\n",
|
||
|
|
"3 Gewurztraminer\n",
|
||
|
|
"4 Pinot Noir\n",
|
||
|
|
"5 Chianti\n",
|
||
|
|
"6 Sauvignon Blanc\n",
|
||
|
|
"7 Chardonnay\n",
|
||
|
|
"8 Shiraz or Zinfandel\n",
|
||
|
|
"9 Cabernet Sauvignon\n",
|
||
|
|
"10 Gewurztraminer\n",
|
||
|
|
"11 Riesling\n",
|
||
|
|
"12 Pinot Noir\n",
|
||
|
|
"13 Chianti\n",
|
||
|
|
"14 Sauvignon Blanc\n",
|
||
|
|
"15 Chardonnay\n",
|
||
|
|
"16 Shiraz or Zinfandel\n",
|
||
|
|
"17 Cabernet Sauvignon\n",
|
||
|
|
"18 Gewurztraminer\n",
|
||
|
|
"19 Riesling\n",
|
||
|
|
"20 Pinot Noir\n",
|
||
|
|
"21 Shiraz or Zinfandel\n",
|
||
|
|
"22 Pinot Noir\n",
|
||
|
|
"23 Cabernet Sauvignon\n",
|
||
|
|
"24 Sauvignon Blanc\n",
|
||
|
|
"25 Pinot Noir\n",
|
||
|
|
"26 Pinot Noir\n",
|
||
|
|
"27 Chardonnay\n",
|
||
|
|
"28 Pinot Noir\n",
|
||
|
|
"29 Shiraz or Zinfandel\n",
|
||
|
|
"30 Pinot Noir\n",
|
||
|
|
"31 Cabernet Sauvignon\n",
|
||
|
|
"32 Pinot Noir\n",
|
||
|
|
"33 Gewurztraminer\n",
|
||
|
|
"34 Sauvignon Blanc\n",
|
||
|
|
"35 Riesling\n",
|
||
|
|
"36 Pinot Noir\n",
|
||
|
|
"37 Pinot Noir\n",
|
||
|
|
"38 Chianti\n",
|
||
|
|
"39 Pinot Noir\n",
|
||
|
|
"Name: Recommendation, dtype: object"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 180
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine.columns"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "w9U_0ZcYsHW0",
|
||
|
|
"outputId": "9507b818-c4cb-444b-8994-01daadf6d7ec"
|
||
|
|
},
|
||
|
|
"execution_count": 181,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"Index(['Recommendation', 'Preference_Red', 'Preference_Red-Earthy',\n",
|
||
|
|
" 'Preference_Red-Fruity', 'Preference_Red-Rich', 'Preference_Red-Spicy',\n",
|
||
|
|
" 'Preference_White', 'Preference_White-Citrus',\n",
|
||
|
|
" 'Preference_White-Creamy', 'Preference_White-Crisp',\n",
|
||
|
|
" 'Preference_White-Floral', 'Red_Wine_Dry', 'Red_Wine_Full-Bodied',\n",
|
||
|
|
" 'Red_Wine_Light-Bodied', 'Red_Wine_None', 'Red_Wine_Sweet',\n",
|
||
|
|
" 'White_Wine_Dry', 'White_Wine_None', 'White_Wine_Sweet'],\n",
|
||
|
|
" dtype='object')"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 181
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"X = wine[['Preference_Red', 'Preference_Red-Earthy',\n",
|
||
|
|
" 'Preference_Red-Fruity', 'Preference_Red-Rich', 'Preference_Red-Spicy',\n",
|
||
|
|
" 'Preference_White', 'Preference_White-Citrus',\n",
|
||
|
|
" 'Preference_White-Creamy', 'Preference_White-Crisp',\n",
|
||
|
|
" 'Preference_White-Floral', 'Red_Wine_Dry', 'Red_Wine_Full-Bodied',\n",
|
||
|
|
" 'Red_Wine_Light-Bodied', 'Red_Wine_None', 'Red_Wine_Sweet',\n",
|
||
|
|
" 'White_Wine_Dry', 'White_Wine_None', 'White_Wine_Sweet']]"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "E8uyeERprdpL"
|
||
|
|
},
|
||
|
|
"execution_count": 182,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"X.shape"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "M7cCKVNusSVS",
|
||
|
|
"outputId": "0acb54a5-a1d0-453d-8b34-a3dadccdfa4f"
|
||
|
|
},
|
||
|
|
"execution_count": 183,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"(40, 18)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 183
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"y.shape"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "JX6KNEplsT2T",
|
||
|
|
"outputId": "8df2bde9-41c9-4092-b1e1-65ed81edacf6"
|
||
|
|
},
|
||
|
|
"execution_count": 184,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"(40,)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 184
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.preprocessing import LabelEncoder\n",
|
||
|
|
"le = LabelEncoder()\n",
|
||
|
|
"\n",
|
||
|
|
"y = le.fit_transform(y)\n",
|
||
|
|
"y"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "KwAXTRavsXJ3",
|
||
|
|
"outputId": "3f6e1f7b-6d7f-4d68-852d-eed24a13720a"
|
||
|
|
},
|
||
|
|
"execution_count": 185,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"array([4, 7, 6, 3, 4, 2, 6, 1, 7, 0, 3, 5, 4, 2, 6, 1, 7, 0, 3, 5, 4, 7,\n",
|
||
|
|
" 4, 0, 6, 4, 4, 1, 4, 7, 4, 0, 4, 3, 6, 5, 4, 4, 2, 4])"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 185
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42,stratify=y)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "-SjHHFu5shkT"
|
||
|
|
},
|
||
|
|
"execution_count": 186,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"print(X_train.shape)\n",
|
||
|
|
"print(y_train.shape)\n",
|
||
|
|
"print(X_test.shape)\n",
|
||
|
|
"print(y_test.shape)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "POOvu3hcspbj",
|
||
|
|
"outputId": "a82dad4b-4f07-487b-fc62-0137459b0aeb"
|
||
|
|
},
|
||
|
|
"execution_count": 187,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
"(28, 18)\n",
|
||
|
|
"(28,)\n",
|
||
|
|
"(12, 18)\n",
|
||
|
|
"(12,)\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"clf1 = tree.DecisionTreeClassifier()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "foK7ookRsxPj"
|
||
|
|
},
|
||
|
|
"execution_count": 188,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"# Fit the classifier to the training data\n",
|
||
|
|
"clf1.fit(X_train, y_train)\n",
|
||
|
|
"\n",
|
||
|
|
"# Predict the class labels for the test data\n",
|
||
|
|
"y_pred = clf1.predict(X_test)\n",
|
||
|
|
"\n",
|
||
|
|
"# Calculate the accuracy of the classifier\n",
|
||
|
|
"accuracy = clf1.score(X_test, y_test)\n",
|
||
|
|
"\n",
|
||
|
|
"# Print the results\n",
|
||
|
|
"print(\"Predicted class labels:\", y_pred)\n",
|
||
|
|
"print(\"Accuracy:\", accuracy)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "fk3ZoFmrs9VL",
|
||
|
|
"outputId": "edbe4fe4-5746-4746-de6f-560e87d3a60b"
|
||
|
|
},
|
||
|
|
"execution_count": 189,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
"Predicted class labels: [1 4 4 4 0 0 0 1 0 4 4 4]\n",
|
||
|
|
"Accuracy: 0.4166666666666667\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"plt.figure(figsize=(30,20))\n",
|
||
|
|
"tree.plot_tree(clf1)\n",
|
||
|
|
"plt.show()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/",
|
||
|
|
"height": 880
|
||
|
|
},
|
||
|
|
"id": "6XSnZLsSs_Uz",
|
||
|
|
"outputId": "22680862-7af1-4dab-af22-d4cc6113fc6b"
|
||
|
|
},
|
||
|
|
"execution_count": 191,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "display_data",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 2160x1440 with 1 Axes>"
|
||
|
|
],
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABpgAAARNCAYAAABGy0PtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdeXgV5d3/8fcdgoDSAgr6uP3qY1FxxwVlCRBAkWpBUaxKq1UqWFwqUqWordrN9RGtVXHDHUurVkRFK1WjICpq3XCpdaFuVUQWwbLz/f0xIRpJIEHCZHm/rmsuSM59Zj7n5GRmMt+57ztFBJIkSZIkSZIkSVJVFeQdQJIkSZIkSZIkSXWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVYsFJkmSJEmSJEmSJFWLBSZJkiRJkiRJkiRViwUmSZIkSZIkSZIkVUth3gEkSZIkSZKkmtCsWbOPFy1atFneOZSvpk2bfrJw4cL/yTuHJNU3KSLyziBJkiRJkiStcyml8NqXUkpERMo7hyTVNw6RJ0mSJEmSJEmSpGqxwCRJkiRJkiRJkqRqscAkSZIkSZIkSZKkarHAJEmSJEmSJOWopKSErbbaitGjRwNwxhlnsMUWWzBs2LBybXbeeWdat25d7rm9evWiffv2NZLrnnvuoXPnznTr1o0333xzlcfbtm1LcXExxcXF3HXXXTWSQZJUeyUnOpQkSZIkSVJ9lFKKunDtq6SkhPHjx3P55ZcD8J///Ic33niDe++9t+x7c+fOZYMNNqBz5868+OKL5Z7fvn37Vb5XkS+++IKNNtqoSpmWLVtGp06deOKJJ3jnnXc499xzVykiVXW7eUspEREp7xySVN/Yg0mSJEmSJElaTx599FGOPvpoAE4++WTuvvvuVdpsvvnmpFS+HtKyZUs23HDDam/v008/ZdSoUXTr1o1HH320ys/717/+Rbt27WjWrBk777wz77///iptFi5cSHFxMYcffjgff/xxtbNJkuq2wrwDSJIkSZIkSQ1Fz549mTBhAieffDILFizgsMMOo6SkZJ1v56GHHuKmm25iyZIlDBw4kEmTJtGkSRMARowYwbRp08q179evH8OHDy/7es6cObRo0aLs6xUrVqyyjalTp7LJJptw33338fOf/5yxY8eu89chSaq9LDBJkiRJkiRJ69Epp5xC27Ztef3112tsG2PHjuWzzz7jtNNO44ADDqCw8MvLgBdffPEan9+qVSvmzZtX9nVBwaoDIW2yySYA9O3bl1//+tfrILUkqS5xiDxJkiRJkiRpPRo+fDjXX389I0eOrLFt3Hbbbdx66628/PLL9OzZk9NPP51///vfQNaDqbi4uNwyatSocs/fbrvteOONN1i0aBGvvfYaW221VbnHFy9ezOLFiwGYNm3aKo9Lkuo/ezBJkiRJkiRJ68k111xDhw4dOP744/n444+59tpr2WGHHcq1ueSSSxg3bhyffvop77//PnfffTcvvfQSP//5z3n77bfZb7/9+P3vf8++++672m1tscUWnHnmmZx55plMnjyZjz76iO985ztV6sFUWFjIyJEj6dGjB40bN+b6668H4Oabb6Zt27Zsu+22fP/736d58+YUFhZyzTXXrP2bIkmqk1JE5J1BkiRJkiRJWudSSlEXrn09/fTTDBkyhKFDhzJ06NBqPbdXr140adKEiRMn1lC6ui+lRESkvHNIUn1jgUmSJEmSJEn1Ul0pMKlmWWCSpJrhHEySJEmSJEmSJEmqFgtMkiRJkiRJUi0xbNgwFixYUOnjxx9//Fqv+5577qFz585069aNN998c5XHb7/9djp06MA+++zDjTfeCMAVV1zBvvvuS6dOncrmbnrllVcoLi6muLiY3XbbjUMPPXStM0mS6i6HyJMkSZIkSVK95BB5X1q2bBmdOnXiiSee4J133uHcc8/lrrvuKtemffv2PPXUUxQWFrLnnnvyyiuv8Pbbb/Pd736XiKBr167ceeedbL755mXPOffcc2nbti1HH330+n5JVeYQeZJUMwrzDiBJkiRJkiQ1NMuWLePII49kzpw57LrrrsyYMYPx48dTXFzM+PHjGT9+PPfddx9Lly7lww8/5O6772abbbahffv2vPjii9Xe3r/+9S/atWtHs2bN2HnnnXn//fdXabP99tuzYMECmjRpwre+9S0Avvvd7wJZkaZx48YUFJQfEOnee+/liSeeqP4bIEmq8xwiT5IkSZIkSVrPxo8fz7bbbssjjzzC3nvvXWGbjTbaiAkTJjBs2DBuu+22Stc1YsSIsiHrVi6jRo0q12bOnDm0aNGi7OsVK1assp5DDjmEPfbYg3bt2nHSSSeVe+yvf/0r22yzDZtttlnZ91566SX+93//l29/+9tVes2SpPrFHkySJEmSJEnSevbWW2+x1157AdChQ4dVhqsD2GOPPQDYeuut+cc//lHpulbOjbQ6rVq1Yt68eWVff70n0vz587nwwgvL5mYqLi6mf//+bLjhhjz//PP88Y9/5IEHHij3nD//+c8cccQRa9y2JKl+ssAkSZIkSZIkrWdt27blueee44gjjuD555+vsE1KX04btLq5pEaMGMG0adPKfa9fv34MHz687OvtttuON954g0WLFvHOO++w1VZblWtfUFDABhtsQLNmzcq2t2zZMmbMmMGJJ57Ivffey4YbbljuOffffz9nn3121V6wJKnescAkSZIkSZIkrWeHHHII48aNo1evXuywww40btx4rddVlR5MhYWFjBw5kh49etC4cWOuv/56AG6++Wbatm1LUVERRx11FJ06dSIiGDBgAN/+9rcZPHgws2fP5sgjjwTg6quvZqedduK5555jhx12YKONNlrr3JKkui2t7u4HSZIkSZIkqa5KKUVtvva1dOlSGjduzO23386rr77KBRdckHekeimlRESkNbeUJFWHPZgkSZIkSZKkHPTv35/PP/+cRo0aMXbs2LzjSJJULfZgkiRJkiRJUr1U23swaf2wB5Mk1YyCvANIkiRJkiRJ+mbat29fo+t//fXXKSoqolu3bhx00EHMnTsXgBdffJFOnTrRtWtXfvrTn9ZoBklS7WKBSZIkSZIkSdJqtWnThgceeIAnnniCfv36cc011wBwxRVX8H//939MnjyZOXPm8PLLL+ecVJK0vlhgkiRJkiRJkmrY9OnT6dixIz169OCEE04A4JJLLqG4uJg999yTCRMmAHDeeecxcOBADjzwQHr16sWYMWPo1asXvXv3Zvny5ZSUlLD//vvTr18/9tprL6ZNm1ZuO7NmzeKQQw6hZ8+eHH744SxatKjCbVdX69atadGiBQBNmjShoCC7rLjTTjsxd+5cIoIvvviCli1bruU7JEmqa5yDSZIkSZIkSfVSbZqD6dJLL6VVq1YMGjSIFStWUFBQwBdffMFGG23E3Llz6d27N9OmTeO8884DskLT4MGD2WqrrTj33HMZPHgwxxxzDMuXL2fkyJFMnTqV9957j+OOO47HHnuM9u3b8+KLL3L66afTu3dvevfuzVVXXUWTJ
|
||
|
|
},
|
||
|
|
"metadata": {
|
||
|
|
"needs_background": "light"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.metrics import confusion_matrix\n",
|
||
|
|
"import seaborn as sns\n",
|
||
|
|
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
|
||
|
|
"plt.figure(figsize=(12,8))\n",
|
||
|
|
"cm = confusion_matrix(y_test,y_pred)\n",
|
||
|
|
" \n",
|
||
|
|
"#Plot the confusion matrix.\n",
|
||
|
|
"sns.heatmap(cm,\n",
|
||
|
|
" annot=True,\n",
|
||
|
|
" fmt='g',\n",
|
||
|
|
" xticklabels=['0', '1' , '2' , '3' , '4' , '5' , '6' , '7'],\n",
|
||
|
|
" yticklabels=['0', '1' , '2' , '3' , '4' , '5' , '6' , '7'])\n",
|
||
|
|
"plt.ylabel('Prediction',fontsize=13)\n",
|
||
|
|
"plt.xlabel('Actual',fontsize=13)\n",
|
||
|
|
"plt.title('Confusion Matrix',fontsize=17)\n",
|
||
|
|
"plt.show()\n",
|
||
|
|
" \n",
|
||
|
|
" "
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/",
|
||
|
|
"height": 520
|
||
|
|
},
|
||
|
|
"id": "Wjy1kMqqtHbj",
|
||
|
|
"outputId": "59a66ab6-a859-4617-91e5-468e8ec69684"
|
||
|
|
},
|
||
|
|
"execution_count": 192,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "display_data",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 864x576 with 2 Axes>"
|
||
|
|
],
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApYAAAH3CAYAAAABsh9oAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4jElEQVR4nO3de5gcZZn//89nwiDIWQGTSaITDWJEvogGBNn1B7oQZAPhUgTZ5SDihq+gEnXBw/LVFUWFxSBZd5WsKIkIJuKBAFFAASFsgAQMGCaIhEQyM4kcDYZjMnP//ugi1cz0THeT7nlmut8vrrqmq7q66u776ive3k/VU44IAQAAAFuqJXUAAAAAaAwUlgAAAKgJCksAAADUBIUlAAAAaoLCEgAAADVBYQkAAICaoLAE8IrZfq/tpbaftR22316Hc9xi+5ZaH3eky/L976njAIBiFJbACGb7Nba/avte23+z/ZztB2zPsr1Hnc+9s6SfqfDvyKcknSjpz/U851CzfVlWwD2ffd++729l+y/ZPr95hec42vaXtjhYABgGtkodAIBXxvY+kn4l6bWS5km6RNJGSW+VdJyk/ytp6zqGMFnSzpLOjYhf1vE8h9Xx2JUISZZ0rKTZfd6bIml3SS9swfGPlnSCpHOr/Ny2kjZtwXkBoOYoLIERyPaOkhZIapW0X0Tc1+f9L0o6r85h7J79/Ws9TxIRL9bz+BXolbRQheKvb2F5oqQlynNRV7ZbJG0dEc9HxPNDcU4AqAZD4cDINF3S6yX9a9+iUpIi4rmI+EzxNtsH2f6t7Q3Z8lvbB/bZ5yPZsO4htr9he102vH6j7QlF+90i6cfZ6s3ZZ27J3rvM9uq+MRUdu71o29ttX5sNJz9vu8v2z2yPKz5X32ssbW9j++u2V9t+Mfv7dduv6rPfatu/sb2f7duz77LG9styU4HLJf2d7TcUHXtHSUdl7/Vj+7O2b7P9mO0XsksU/tW2i7+bpJMljcpyE7Yje689Wz/H9mm2H1ChM3p49v7mayxdcIPt9bZf3yeOX9p+xvabq/zOAFA1OpbAyHS0CkXGTyrZ2fZ7JN0oqVt5J/M0FYrC90XE7X0+8h+Snpf0dUm7SvpXFQrJd2fvnyepQ9LHs31WSPpLNV/A9m6SfiPpKUkzJT0uqU2F4eWxkjoH+Jwl/VzS+1Uo6hZncX1B0t6SjuzzkddLukbSj7L9j5P0Ldv3R8T1FYZ7raT1KnQtX8rfB1XoGP9EUqlC9TOSrpN0lQpD1oeqkNddJP1bts95Kvwf/INUKDBL+bCknVS41OFJSav77hARYfsUSX+QNMf2e7NtH5U0TdIZEfFghd8VAF65iGBhYRlhiwoFxr1V7L9UhQLudUXbxqhQLN1VtO0jKlxTeLukUUXbZ2Tb9yradkK27eA+57pM0uoSMbx07PZsfVq2vl+Z2G+RdEvR+tTsc9/os99/ZNuPKNq2Ott2eNG2V6lQBP+0grxdJmlT9nq2pI6i926SdF3ReX7T57OvLnG870vaIOlVpc7RZ9/2LPZnJY0r8X5I+vc+2z6cbf+spAmSnpb069S/VxYWluZZGAoHRqYdVSgayrI9WtI7Jf0oIjZ3FSNirQodvP1s971G8JKI6Cla/132942vPOR+/pr9ParvEHYZU7O/F/bZfkGf91+yKiJ+/dJKRLwg6Q5V/10ulzTJ9jttj5d0sAYYBs/O86y0+c7xXWzvqkKRvJ2kPas474KIKNm9LXHOn0i6UoVO6M9V6JR+tIpzAcAWobAERqanJe1Q4b7t2d8HSrzXkf2d0Gd732mDnsr+vqbCc1biVhWGkc+R9ITtX9v+pO3Xlvlcu6THIuKJ4o0R8ZgKw+nlvotU+D7VfpfbsmOdIOmfVeg8/nKgnW0fYfsOSc+p0GF+TIXheKlwN32lVlYZ5+lZbG+X9MmI6K7y8wDwilFYAiPTCkl7Vtnpq0bPANs9wPZiMcD2US/bqeB4Fbqp56swfc5Fkh6w/bZKA63AlnyXzSIiVLjO9HgV7gb/eUQ8V/LA9rtVuK5zkwqF3j+qcI3l57Jdqvm3t+Q5BvEuFaagkgrXnALAkKGwBEamqyVto8LciuWszv6+pcR7k7K/q2oQ00ueUumOXHupnSPinoj4akT8f5LekX32s4Mcf7Wk3fp2NrOh5l1V2+/S1+WSXqfCXKEDDoNL+pCkFyX9Q0T8T0QsjIjfqPTUTAMV4lWz/RpJP5C0TNL/SDorK3IBYEhQWAIj0yUq3DX9rVLdvWw6npmSFBHrVLh558Tiaymzay9PVOHmnUdrGNtDknayvW/RubZXn7ues+sO+3YNV6jQodt5kONfk/3teyf2WX3er7mIWJGd9/+pcPPOQHpVKBg3d2ltbyPpkyX2fUaF6Ya2r0GI31WhW3miCjdcrZQ01/Z2NTg2AJTFdEPACBQR621PU2Hi7rttXynpThWevPMWFabU2V158fVZFab2ucP2S5N8n6ZC17PaOR3LuVLSNyX9wvbFKkzJ81EV7sQeX7TfyZI+afsXKhSjW6lwV/MO2TEGslDSryV9MZvv8k5JB6hQTF0bEb+q7dd5uYi4qILdFkj6tKTf2P6RCt/pZBWmcOrr7uzvf7rwWMie7Cacqtj+JxU62GdFxPJs20mSFkn6lgpPYgKAuqKwBEaoiLgn61Z+WoWJuj+kQodslQrP8J5VtO+ttt+rwmMDz8k23yXpnyPif2sc11NZ0XuRCtdOdqkwT+XfJP2waNffqfBYyA9KGq3CtDr3Szo6Iq4e5Phh+wOSviTpn1S45nGtpG+o+sci1kVE/M72iZK+qMJ3f1SFaYVuk3RDn90vV6Ew/oAKxadV4fykL8kK7P9S4YaomUVx3GH7m5L+zfYvi++OB4B6cOF6dAAAAGDLcI0lAAAAaoLCEgAAoEnZHmX797avLfHeq2zPs/2Q7Tttt5c7HoUlAABA8zpThRk5SjlV0lMRMVH5dfODorAEAABoQtmNf/8o6fsD7DJN0pzs9VWS3ldimriXobAEAABoTt+WdLYKc++WMlbSGkmKiE2S1it/sldJI2K6oY+2H8Ot65m53YtThwAAQEPY9GJXVY92rYeNjz9clxpn693edJqk6UWbZkfES/MYy/ZUSY9GxN22D67VeUdEYQkAAIDKZUXk7EF2OUjSUbaPUOFhGTvavjwiTijap0uFB1t02t5K0k6SnhjsvAyFAwAApNLbU5+ljIj4QkSMi4h2FZ56dlOfolIqPEXspcfxHpPtM2iHlY4lAABAKjHQ5Y1p2D5X0tKIWCDpUkk/sv2QpCdVKEAHRWEJAADQxCLiFkm3ZK+/VLT9eRUeF1wxCksAAIBUeodXx3JLcY0lAAAAaoKOJQAAQCIxzK6x3FIUlgAAAKkwFA4AAAD0R8cSAAAglQYbCqdjCQAAgJqgYwkAAJBKBU/JGUnoWAIAAKAm6FgCAACk0mDXWFJYAgAApMJ0QwAAAEB/dCwBAAASabQn79CxBAAAQE3QsQQAAEilwa6xpLAEAABIhaFwAAAAoD8KywqccsHp+vbSS3Xu9TNTh5LclMMO1v3Lb9UDHYt09llnpA4nKXKRIxc5cpEjFzlykSMXffT21GdJhMKyArdfdbNmnvy11GEk19LSolkXn6epR56gvfc5RMcdd7QmTdojdVhJkIscuciRixy5yJGLHLlofBSWFXjwrhV6Zv2G1GEkt/9++2rlytVateoRbdy4UfPnX62jjpySOqwkyEWOXOTIRY5c5MhFjlyUEL31WRKhsETF2saO1prO7s3rnV1r1dY2OmFE6ZCLHLnIkYscuciRixy5KKG3tz5LIkNyV7jtt0iaJmlstqlL0oKIWDEU5wcAAED91b1jaftzkn4iyZLuyhZLutL25wf53HTbS20v/ePfHq53mKhAd9c6jR/Xtnl93Ngx6u5elzCidMhFjlzkyEWOXOTIRY5clMBQeNVOlbRfRHwzIi7Plm9K2j97r6SImB0RkyNi8p47vHEIwkQ5S5Yu08SJE9TePl6tra069thpuubaG1KHlQS5yJGLHLnIkYscuciRi8Y3FEPhv
|
||
|
|
},
|
||
|
|
"metadata": {
|
||
|
|
"needs_background": "light"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"print(classification_report(y_test, y_pred))"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "ONCikocltz97",
|
||
|
|
"outputId": "762a6219-0f63-4fb0-9a3d-978787857cf2"
|
||
|
|
},
|
||
|
|
"execution_count": 193,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
" precision recall f1-score support\n",
|
||
|
|
"\n",
|
||
|
|
" 0 0.25 1.00 0.40 1\n",
|
||
|
|
" 1 0.00 0.00 0.00 1\n",
|
||
|
|
" 2 0.00 0.00 0.00 1\n",
|
||
|
|
" 3 0.00 0.00 0.00 1\n",
|
||
|
|
" 4 0.67 1.00 0.80 4\n",
|
||
|
|
" 5 0.00 0.00 0.00 1\n",
|
||
|
|
" 6 0.00 0.00 0.00 1\n",
|
||
|
|
" 7 0.00 0.00 0.00 2\n",
|
||
|
|
"\n",
|
||
|
|
" accuracy 0.42 12\n",
|
||
|
|
" macro avg 0.11 0.25 0.15 12\n",
|
||
|
|
"weighted avg 0.24 0.42 0.30 12\n",
|
||
|
|
"\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stderr",
|
||
|
|
"text": [
|
||
|
|
"/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||
|
|
" _warn_prf(average, modifier, msg_start, len(result))\n",
|
||
|
|
"/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||
|
|
" _warn_prf(average, modifier, msg_start, len(result))\n",
|
||
|
|
"/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||
|
|
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"source": [
|
||
|
|
"### K-Fold Cross Validation"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "6mTCBRDdvsI5"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"X = wine.drop('Recommendation',axis=1)\n",
|
||
|
|
"y = wine['Recommendation']"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "ECTwhEZwwLkU"
|
||
|
|
},
|
||
|
|
"execution_count": 195,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.model_selection import KFold\n",
|
||
|
|
"kf = KFold(n_splits=5, shuffle=True, random_state=42)\n",
|
||
|
|
"\n",
|
||
|
|
"# initialize an empty list to store the performance of each fold\n",
|
||
|
|
"scores = []\n",
|
||
|
|
"\n",
|
||
|
|
"# iterate over each fold\n",
|
||
|
|
"for train_index, test_index in kf.split(X):\n",
|
||
|
|
" # extract the training and testing sets\n",
|
||
|
|
" X_train, X_test = X.iloc[train_index], X.iloc[test_index]\n",
|
||
|
|
" y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n",
|
||
|
|
" \n",
|
||
|
|
" # train the model\n",
|
||
|
|
" model = tree.DecisionTreeClassifier()\n",
|
||
|
|
" model.fit(X_train, y_train)\n",
|
||
|
|
"\n",
|
||
|
|
" # evaluate the performance on the test set and append the score to the list\n",
|
||
|
|
" score = model.score(X_test, y_test)\n",
|
||
|
|
" scores.append(score)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "qjtOJfy-t_kj"
|
||
|
|
},
|
||
|
|
"execution_count": 196,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"scores"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "4TLrmi_rwX5j",
|
||
|
|
"outputId": "01b8496d-31d7-4aa9-8518-a2c24a54f0dd"
|
||
|
|
},
|
||
|
|
"execution_count": 197,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"[0.375, 0.0, 0.25, 0.25, 0.25]"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 197
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"\n",
|
||
|
|
"# calculate the average score across all folds\n",
|
||
|
|
"mean_score = sum(scores) / len(scores)\n",
|
||
|
|
"\n",
|
||
|
|
"print('Average score across all folds:', mean_score)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "RaaNfImIwDej",
|
||
|
|
"outputId": "2595e52e-80d3-4d6a-89a7-7d6c955a914e"
|
||
|
|
},
|
||
|
|
"execution_count": 198,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
"Average score across all folds: 0.225\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"source": [
|
||
|
|
"## Use Stratified K-Fold"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "05mPzYQozTA1"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.model_selection import StratifiedKFold\n",
|
||
|
|
"\n",
|
||
|
|
"# X is your feature matrix, y is your target variable\n",
|
||
|
|
"skf = StratifiedKFold(n_splits=5)\n",
|
||
|
|
"skf_score = []\n",
|
||
|
|
"\n",
|
||
|
|
"for train_index, test_index in skf.split(X, y):\n",
|
||
|
|
" X_train, X_test = X.iloc[train_index], X.iloc[test_index]\n",
|
||
|
|
" y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n",
|
||
|
|
"\n",
|
||
|
|
" # Train and evaluate your model using the training and test sets\n",
|
||
|
|
" model = tree.DecisionTreeClassifier()\n",
|
||
|
|
" model.fit(X_train, y_train)\n",
|
||
|
|
" score = model.score(X_test, y_test)\n",
|
||
|
|
" skf_score.append(score)\n",
|
||
|
|
" print(\"Test score: {}\".format(score))"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "Gvzhfx_owoUT",
|
||
|
|
"outputId": "dfca45db-9c49-488b-cc71-45e89c52f2ce"
|
||
|
|
},
|
||
|
|
"execution_count": 201,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
"Test score: 0.125\n",
|
||
|
|
"Test score: 0.375\n",
|
||
|
|
"Test score: 0.25\n",
|
||
|
|
"Test score: 0.25\n",
|
||
|
|
"Test score: 0.5\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stderr",
|
||
|
|
"text": [
|
||
|
|
"/usr/local/lib/python3.9/dist-packages/sklearn/model_selection/_split.py:700: UserWarning: The least populated class in y has only 3 members, which is less than n_splits=5.\n",
|
||
|
|
" warnings.warn(\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"skf_score"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "3lYM1Kguzj6j",
|
||
|
|
"outputId": "95c02fca-e8d8-4d60-eb26-9976926186fe"
|
||
|
|
},
|
||
|
|
"execution_count": 202,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"[0.125, 0.375, 0.25, 0.25, 0.5]"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 202
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"mean_score_skf = sum(skf_score) / len(skf_score)\n",
|
||
|
|
"\n",
|
||
|
|
"print('Average score across all folds(skf):', mean_score_skf)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "5v9a2zth0GEC",
|
||
|
|
"outputId": "24263aaa-8693-4cc9-eaf1-190bdd135627"
|
||
|
|
},
|
||
|
|
"execution_count": 203,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
"Average score across all folds(skf): 0.3\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"new_data = [['Red','Light-Bodied','None']]"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "xpP2OCHi1AF6"
|
||
|
|
},
|
||
|
|
"execution_count": 216,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
||
|
|
"ohe = OneHotEncoder()\n",
|
||
|
|
"new_data_transformed = ohe.fit_transform(new_data).reshape(1,-1)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "GOhB6sZf0OsR"
|
||
|
|
},
|
||
|
|
"execution_count": 219,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"new_data_transformed.shape"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "5w27VUrH039B",
|
||
|
|
"outputId": "c83493ba-ab65-4ed7-8467-dcbef5776ef3"
|
||
|
|
},
|
||
|
|
"execution_count": 220,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"(1, 3)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 220
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"new_data_transformed[0][0]"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "gmkyYi7r3aKa",
|
||
|
|
"outputId": "0e1def97-7c5d-4808-88c5-2c8a930ce711"
|
||
|
|
},
|
||
|
|
"execution_count": 226,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"<1x3 sparse matrix of type '<class 'numpy.float64'>'\n",
|
||
|
|
"\twith 3 stored elements in Compressed Sparse Row format>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 226
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"source": [
|
||
|
|
"### Using Pipeline"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "50I5UMNa5MPJ"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine = pd.read_excel('/content/wine_data.xlsx')"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "EcbyQ1vW5N4J"
|
||
|
|
},
|
||
|
|
"execution_count": 240,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"wine['Recommendation'] = wine['Recommendation'].fillna(wine['Recommendation'].value_counts().sort_values(ascending=False).keys()[0])\n",
|
||
|
|
"wine['White_Wine'] = wine['White_Wine'].fillna('None')\n",
|
||
|
|
"wine['Red_Wine'] = wine['Red_Wine'].fillna('None')"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "sEfcQb5a5epv"
|
||
|
|
},
|
||
|
|
"execution_count": 241,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"X = wine.drop('Recommendation',axis=1)\n",
|
||
|
|
"y = wine['Recommendation']"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "Lfk7Ha6p5-jK"
|
||
|
|
},
|
||
|
|
"execution_count": 242,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"le = LabelEncoder()\n",
|
||
|
|
"y = le.fit_transform(y)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "q4S2u4Js8Muq"
|
||
|
|
},
|
||
|
|
"execution_count": 244,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"y"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "lSNmXh-g8UYx",
|
||
|
|
"outputId": "05a0768e-4f5e-4aac-a88a-6d8c4fe013d8"
|
||
|
|
},
|
||
|
|
"execution_count": 245,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"array([4, 7, 6, 3, 4, 2, 6, 1, 7, 0, 3, 5, 4, 2, 6, 1, 7, 0, 3, 5, 4, 7,\n",
|
||
|
|
" 4, 0, 6, 4, 4, 1, 4, 7, 4, 0, 4, 3, 6, 5, 4, 4, 2, 4])"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 245
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||
|
|
" X, y, test_size=0.2, random_state=42,stratify=y)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "3fx7c9hY5wha"
|
||
|
|
},
|
||
|
|
"execution_count": 243,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.pipeline import Pipeline\n",
|
||
|
|
"pipeline = Pipeline([\n",
|
||
|
|
" ('onehot', OneHotEncoder(handle_unknown='ignore')),\n",
|
||
|
|
" ('classifier', tree.DecisionTreeClassifier())\n",
|
||
|
|
"])"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "0G5B1atJ6uhy"
|
||
|
|
},
|
||
|
|
"execution_count": 247,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"pipeline.fit(X_train, y_train)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/",
|
||
|
|
"height": 126
|
||
|
|
},
|
||
|
|
"id": "wBakGDQX638K",
|
||
|
|
"outputId": "0ba601ab-befd-42ce-fbfa-b96df3c886fb"
|
||
|
|
},
|
||
|
|
"execution_count": 248,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore')),\n",
|
||
|
|
" ('classifier', DecisionTreeClassifier())])"
|
||
|
|
],
|
||
|
|
"text/html": [
|
||
|
|
"<style>#sk-container-id-3 {color: black;background-color: white;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-con
|
||
|
|
" ('classifier', DecisionTreeClassifier())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore')),\n",
|
||
|
|
" ('classifier', DecisionTreeClassifier())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown='ignore')</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier()</pre></div></div></div></div></div></div></div>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 248
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"y_pred = pipeline.predict(X_test)\n",
|
||
|
|
"\n",
|
||
|
|
"# Evaluate the accuracy of your model\n",
|
||
|
|
"accuracy = accuracy_score(y_test, y_pred)\n",
|
||
|
|
"print(f\"Accuracy: {accuracy}\")"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "swEZCh2_65SR",
|
||
|
|
"outputId": "bac8fba1-51f6-4f7b-980a-41e8ab182e4a"
|
||
|
|
},
|
||
|
|
"execution_count": 249,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stdout",
|
||
|
|
"text": [
|
||
|
|
"Accuracy: 0.375\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.model_selection import cross_val_score\n",
|
||
|
|
"cross_val_score(pipeline,X_train,y_train,cv=5,scoring='accuracy').mean()"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "K8DDm4r1ADEp",
|
||
|
|
"outputId": "101bcb1b-170b-495f-e568-ec12b645e567"
|
||
|
|
},
|
||
|
|
"execution_count": 251,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stderr",
|
||
|
|
"text": [
|
||
|
|
"/usr/local/lib/python3.9/dist-packages/sklearn/model_selection/_split.py:700: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=5.\n",
|
||
|
|
" warnings.warn(\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"0.2857142857142857"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 251
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"source": [
|
||
|
|
"### GridSearchCV"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "4FPMpMDHAm8g"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"params = {'classifier__criterion':['gini','entropy'],'classifier__max_depth':[4,5,6,7,8,9,10,11,12,15,20,30,40,50,70,90,120,150]}"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "xd41Bw22BN7r"
|
||
|
|
},
|
||
|
|
"execution_count": 255,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"from sklearn.model_selection import GridSearchCV\n",
|
||
|
|
"grid = GridSearchCV(pipeline,params,cv=5,scoring='accuracy')\n",
|
||
|
|
"grid.fit(X_train,y_train)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/",
|
||
|
|
"height": 208
|
||
|
|
},
|
||
|
|
"id": "XppiX9naAmaR",
|
||
|
|
"outputId": "729ad00f-86bf-4d17-9d30-18694fcc6d98"
|
||
|
|
},
|
||
|
|
"execution_count": 256,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stderr",
|
||
|
|
"text": [
|
||
|
|
"/usr/local/lib/python3.9/dist-packages/sklearn/model_selection/_split.py:700: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=5.\n",
|
||
|
|
" warnings.warn(\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"GridSearchCV(cv=5,\n",
|
||
|
|
" estimator=Pipeline(steps=[('onehot',\n",
|
||
|
|
" OneHotEncoder(handle_unknown='ignore')),\n",
|
||
|
|
" ('classifier',\n",
|
||
|
|
" DecisionTreeClassifier())]),\n",
|
||
|
|
" param_grid={'classifier__criterion': ['gini', 'entropy'],\n",
|
||
|
|
" 'classifier__max_depth': [4, 5, 6, 7, 8, 9, 10, 11, 12,\n",
|
||
|
|
" 15, 20, 30, 40, 50, 70, 90,\n",
|
||
|
|
" 120, 150]},\n",
|
||
|
|
" scoring='accuracy')"
|
||
|
|
],
|
||
|
|
"text/html": [
|
||
|
|
"<style>#sk-container-id-4 {color: black;background-color: white;}#sk-container-id-4 pre{padding: 0;}#sk-container-id-4 div.sk-toggleable {background-color: white;}#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-4 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-4 div.sk-item {position: relative;z-index: 1;}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-4 div.sk-item::before, #sk-container-id-4 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-4 div.sk-label-container {text-align: center;}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-4 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-4\" class=\"sk-top-con
|
||
|
|
" estimator=Pipeline(steps=[('onehot',\n",
|
||
|
|
" OneHotEncoder(handle_unknown='ignore')),\n",
|
||
|
|
" ('classifier',\n",
|
||
|
|
" DecisionTreeClassifier())]),\n",
|
||
|
|
" param_grid={'classifier__criterion': ['gini', 'entropy'],\n",
|
||
|
|
" 'classifier__max_depth': [4, 5, 6, 7, 8, 9, 10, 11, 12,\n",
|
||
|
|
" 15, 20, 30, 40, 50, 70, 90,\n",
|
||
|
|
" 120, 150]},\n",
|
||
|
|
" scoring='accuracy')</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">GridSearchCV</label><div class=\"sk-toggleable__content\"><pre>GridSearchCV(cv=5,\n",
|
||
|
|
" estimator=Pipeline(steps=[('onehot',\n",
|
||
|
|
" OneHotEncoder(handle_unknown='ignore')),\n",
|
||
|
|
" ('classifier',\n",
|
||
|
|
" DecisionTreeClassifier())]),\n",
|
||
|
|
" param_grid={'classifier__criterion': ['gini', 'entropy'],\n",
|
||
|
|
" 'classifier__max_depth': [4, 5, 6, 7, 8, 9, 10, 11, 12,\n",
|
||
|
|
" 15, 20, 30, 40, 50, 70, 90,\n",
|
||
|
|
" 120, 150]},\n",
|
||
|
|
" scoring='accuracy')</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">estimator: Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore')),\n",
|
||
|
|
" ('classifier', DecisionTreeClassifier())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown='ignore')</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier()</pre></div></div></div></div></div></div></div></div></div></div></div></div>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 256
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"grid.best_score_"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "Rmghi5mxBnma",
|
||
|
|
"outputId": "c2287da2-f266-4606-8197-51d389eea71e"
|
||
|
|
},
|
||
|
|
"execution_count": 258,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"0.35238095238095235"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 258
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"grid.best_params_"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "6oesag6TBs-A",
|
||
|
|
"outputId": "6dc525d8-8a24-4678-81fd-addab9e346c7"
|
||
|
|
},
|
||
|
|
"execution_count": 259,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"{'classifier__criterion': 'gini', 'classifier__max_depth': 4}"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 259
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"source": [
|
||
|
|
"## Exporting the PipeLine"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "ncTd9LuTBxzD"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"import pickle\n",
|
||
|
|
"pickle.dump(pipeline,open('/content/sample_data/pipe.pkl','wb'))"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "6lNtvIt7B5fa"
|
||
|
|
},
|
||
|
|
"execution_count": 260,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"pipe = pickle.load(open('/content/sample_data/pipe.pkl','rb'))"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "qREsT8mGCIvL"
|
||
|
|
},
|
||
|
|
"execution_count": 261,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"new_data = np.array(['Red','Light-Bodied','None'],dtype='object').reshape(1,3)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"id": "mrZsYNQF68zR"
|
||
|
|
},
|
||
|
|
"execution_count": 265,
|
||
|
|
"outputs": []
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"new_data"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "-m-6qe2GCk5o",
|
||
|
|
"outputId": "85d423c3-5a1e-4072-8f99-6947d8a2dd18"
|
||
|
|
},
|
||
|
|
"execution_count": 266,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"array([['Red', 'Light-Bodied', 'None']], dtype=object)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 266
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [
|
||
|
|
"pipe.predict(new_data)"
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"colab": {
|
||
|
|
"base_uri": "https://localhost:8080/"
|
||
|
|
},
|
||
|
|
"id": "0y6C3YUjC41h",
|
||
|
|
"outputId": "01b5ba9b-ad77-4e84-c2c2-1eddced5e38b"
|
||
|
|
},
|
||
|
|
"execution_count": 267,
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"output_type": "stream",
|
||
|
|
"name": "stderr",
|
||
|
|
"text": [
|
||
|
|
"/usr/local/lib/python3.9/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but OneHotEncoder was fitted with feature names\n",
|
||
|
|
" warnings.warn(\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"output_type": "execute_result",
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"array(['Pinot Noir'], dtype=object)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"execution_count": 267
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"source": [],
|
||
|
|
"metadata": {
|
||
|
|
"id": "-yrFSUIi7Xgx"
|
||
|
|
},
|
||
|
|
"execution_count": null,
|
||
|
|
"outputs": []
|
||
|
|
}
|
||
|
|
]
|
||
|
|
}
|