{ "cells": [ { "cell_type": "markdown", "id": "709dc801-bc0b-4c2c-ab52-26b3de1b10b4", "metadata": {}, "source": [ "The usual suspects - imports. You only need run this once." ] }, { "cell_type": "code", "execution_count": 11, "id": "63528a79-842d-44ec-9109-29df1621cef8", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "from sklearn.metrics import classification_report, confusion_matrix\n", "from sklearn.utils import resample\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler" ] }, { "cell_type": "markdown", "id": "2031a1c7-636b-4b16-89fa-aed0362c5b83", "metadata": {}, "source": [ "Load the data and split it into features vs labels subsets. Again, only need to run it once." ] }, { "cell_type": "code", "execution_count": 4, "id": "d106a41b-e75c-4ca0-96e1-0e5b117992bf", "metadata": {}, "outputs": [], "source": [ "wine_data = pd.read_csv(\"../WineQT.csv\", delimiter=\",\")\n", "wine_features = wine_data.drop(\"quality\", axis=1).drop(\"Id\", axis=1)\n", "wine_labels = np.ravel(wine_data['quality'])" ] }, { "cell_type": "markdown", "id": "4b143574-6353-4438-8d2f-0483d573a203", "metadata": {}, "source": [ "Check the data samples." ] }, { "cell_type": "code", "execution_count": 5, "id": "bc35067f-473e-4472-974c-c9385ba5e3da", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | fixed acidity | \n", "volatile acidity | \n", "citric acid | \n", "residual sugar | \n", "chlorides | \n", "free sulfur dioxide | \n", "total sulfur dioxide | \n", "density | \n", "pH | \n", "sulphates | \n", "alcohol | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "7.4 | \n", "0.700 | \n", "0.00 | \n", "1.9 | \n", "0.076 | \n", "11.0 | \n", "34.0 | \n", "0.99780 | \n", "3.51 | \n", "0.56 | \n", "9.4 | \n", "
1 | \n", "7.8 | \n", "0.880 | \n", "0.00 | \n", "2.6 | \n", "0.098 | \n", "25.0 | \n", "67.0 | \n", "0.99680 | \n", "3.20 | \n", "0.68 | \n", "9.8 | \n", "
2 | \n", "7.8 | \n", "0.760 | \n", "0.04 | \n", "2.3 | \n", "0.092 | \n", "15.0 | \n", "54.0 | \n", "0.99700 | \n", "3.26 | \n", "0.65 | \n", "9.8 | \n", "
3 | \n", "11.2 | \n", "0.280 | \n", "0.56 | \n", "1.9 | \n", "0.075 | \n", "17.0 | \n", "60.0 | \n", "0.99800 | \n", "3.16 | \n", "0.58 | \n", "9.8 | \n", "
4 | \n", "7.4 | \n", "0.700 | \n", "0.00 | \n", "1.9 | \n", "0.076 | \n", "11.0 | \n", "34.0 | \n", "0.99780 | \n", "3.51 | \n", "0.56 | \n", "9.4 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1138 | \n", "6.3 | \n", "0.510 | \n", "0.13 | \n", "2.3 | \n", "0.076 | \n", "29.0 | \n", "40.0 | \n", "0.99574 | \n", "3.42 | \n", "0.75 | \n", "11.0 | \n", "
1139 | \n", "6.8 | \n", "0.620 | \n", "0.08 | \n", "1.9 | \n", "0.068 | \n", "28.0 | \n", "38.0 | \n", "0.99651 | \n", "3.42 | \n", "0.82 | \n", "9.5 | \n", "
1140 | \n", "6.2 | \n", "0.600 | \n", "0.08 | \n", "2.0 | \n", "0.090 | \n", "32.0 | \n", "44.0 | \n", "0.99490 | \n", "3.45 | \n", "0.58 | \n", "10.5 | \n", "
1141 | \n", "5.9 | \n", "0.550 | \n", "0.10 | \n", "2.2 | \n", "0.062 | \n", "39.0 | \n", "51.0 | \n", "0.99512 | \n", "3.52 | \n", "0.76 | \n", "11.2 | \n", "
1142 | \n", "5.9 | \n", "0.645 | \n", "0.12 | \n", "2.0 | \n", "0.075 | \n", "32.0 | \n", "44.0 | \n", "0.99547 | \n", "3.57 | \n", "0.71 | \n", "10.2 | \n", "
1143 rows × 11 columns
\n", "