wine-sklearn.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. #!/bin/false
  2. # ^^^ this just means don't allow this to be executed as a stand-alone script
  3. # the basic imports
  4. import matplotlib.pyplot as plt
  5. import numpy as np
  6. import pandas as pd
  7. import seaborn as sns
  8. # but also reporting on the model
  9. from sklearn.metrics import classification_report, confusion_matrix
  10. # load data, extract just the features, and just the labels
  11. wine_data = pd.read_csv("./WineQT.csv", delimiter=",")
  12. wine_features = wine_data.drop("quality", axis=1).drop("Id", axis=1)
  13. wine_labels = np.ravel(wine_data['quality'])
  14. # split the dataset into train and test subsets
  15. # note, while it may be tempting to get creative with variable names, such as
  16. # features_train, features_test, labels_train, labels_test...
  17. # it's WAY TOO MUCH typing, and most examples use x for features (as in, input
  18. # data) and y for labels (as in, result)
  19. from sklearn.model_selection import train_test_split
  20. x_train, x_test, y_train, y_test = train_test_split(wine_features, wine_labels, test_size=0.5, random_state=50)
  21. # normalise the data (meaning spread it ALL out on a scale between a..b)
  22. from sklearn.preprocessing import StandardScaler
  23. scaler = StandardScaler().fit(x_train)
  24. x_train = scaler.transform(x_train)
  25. x_test = scaler.transform(x_test)
  26. # train the SVC model
  27. print("**** TESTING C-Support Vector Classification ****")
  28. from sklearn.svm import SVC
  29. svc_model = SVC()
  30. svc_model.fit(x_train, y_train)
  31. # now test the fitness with the test subset
  32. svc_y_predict = svc_model.predict(x_test)
  33. # visualise it
  34. svc_cm = np.array(confusion_matrix(y_test, svc_y_predict, labels=[0,1,2,3,4,5,6,7,8,9,10]))
  35. svc_conf_matrix = pd.DataFrame(svc_cm)
  36. print(svc_conf_matrix)
  37. # visualise it in a nice picture
  38. sns.heatmap(svc_conf_matrix, annot=True, fmt='g')
  39. plt.show()
  40. # # train the NuSVC model
  41. # print("**** TESTING Nu-Support Vector Classification ****")
  42. # from sklearn.svm import NuSVC
  43. # nusvc_model = NuSVC(nu=0.2)
  44. # nusvc_model.fit(x_train, y_train)
  45. # # now test the fitness with the test subset
  46. # nusvc_y_predict = svc_model.predict(x_test)
  47. # # visualise it
  48. # nu_cm = np.array(confusion_matrix(y_test, nusvc_y_predict, labels=[0,1,2,3,4,5,6,7,8,9,10]))
  49. # nu_conf_matrix = pd.DataFrame(nu_cm)
  50. # print(nu_conf_matrix)
  51. # # visualise it in a nice picture
  52. # sns.heatmap(nu_conf_matrix, annot=True, fmt='g')
  53. # plt.show()