In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy
from scipy import stats
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from collections import Counter
from nltk.corpus import stopwords
import pprint
In [2]:
features = ['Category',
'Item',
'Serving Size',
'Calories',
'Calories from Fat',
'Total Fat',
'Saturated Fat',
'Trans Fat',
'Cholesterol',
'Sodium',
'Carbohydrates',
'Dietary Fiber',
'Sugars',
'Protein',
'Vitamin A (% Daily Value)',
'Vitamin C (% Daily Value)',
'Calcium (% Daily Value)',
'Iron (% Daily Value)']

dataset = pd.read_csv('menu.csv',header=0,usecols=features)
In [3]:
dataset.head(5)
Out[3]:
Category Item Serving Size Calories Calories from Fat Total Fat Saturated Fat Trans Fat Cholesterol Sodium Carbohydrates Dietary Fiber Sugars Protein Vitamin A (% Daily Value) Vitamin C (% Daily Value) Calcium (% Daily Value) Iron (% Daily Value)
0 Breakfast Egg McMuffin 4.8 oz (136 g) 300 120 13.0 5.0 0.0 260 750 31 4 3 17 10 0 25 15
1 Breakfast Egg White Delight 4.8 oz (135 g) 250 70 8.0 3.0 0.0 25 770 30 4 3 18 6 0 25 8
2 Breakfast Sausage McMuffin 3.9 oz (111 g) 370 200 23.0 8.0 0.0 45 780 29 4 2 14 8 0 25 10
3 Breakfast Sausage McMuffin with Egg 5.7 oz (161 g) 450 250 28.0 10.0 0.0 285 860 30 4 2 21 15 0 30 15
4 Breakfast Sausage McMuffin with Egg Whites 5.7 oz (161 g) 400 210 23.0 8.0 0.0 50 880 30 4 2 21 6 0 25 10
In [4]:
df = dataset.drop(["Category","Item","Serving Size"],axis=1)
In [5]:
z = np.abs(stats.zscore(df))
print(np.where(z > 7))

print(dataset.iloc[82])
dataset = dataset.drop([82],axis=0)
df = df.drop([82],axis=0)
(array([ 82,  82, 135]), array([ 1,  2, 12]))
Category                                   Chicken & Fish
Item                         Chicken McNuggets (40 piece)
Serving Size                              22.8 oz (646 g)
Calories                                             1880
Calories from Fat                                    1060
Total Fat                                             118
Saturated Fat                                          20
Trans Fat                                               1
Cholesterol                                           265
Sodium                                               3600
Carbohydrates                                         118
Dietary Fiber                                           6
Sugars                                                  1
Protein                                                87
Vitamin A (% Daily Value)                               0
Vitamin C (% Daily Value)                              15
Calcium (% Daily Value)                                 8
Iron (% Daily Value)                                   25
Name: 82, dtype: object
In [6]:
normalized_df=(df-df.mean())/df.std()
normalized_df["Category"] = dataset[["Category"]]
normalized_df.head(5)
Out[6]:
Calories Calories from Fat Total Fat Saturated Fat Trans Fat Cholesterol Sodium Carbohydrates Dietary Fiber Sugars Protein Vitamin A (% Daily Value) Vitamin C (% Daily Value) Calcium (% Daily Value) Iron (% Daily Value) Category
0 -0.281865 -0.030612 -0.060316 -0.181297 -0.470098 2.381337 0.488727 -0.574834 1.542477 -0.925115 0.376316 -0.142577 -0.322415 0.233469 0.845334 Breakfast
1 -0.507601 -0.468647 -0.454811 -0.561506 -0.470098 -0.336969 0.525441 -0.610597 1.542477 -0.925115 0.471683 -0.306517 -0.322415 0.233469 0.038283 Breakfast
2 0.034165 0.670245 0.728672 0.389017 -0.470098 -0.105624 0.543798 -0.646360 1.542477 -0.959981 0.090213 -0.224547 -0.322415 0.233469 0.268869 Breakfast
3 0.395343 1.108280 1.123166 0.769227 -0.470098 2.670519 0.690654 -0.610597 1.542477 -0.959981 0.757787 0.062348 -0.322415 0.527005 0.845334 Breakfast
4 0.169607 0.757852 0.728672 0.389017 -0.470098 -0.047788 0.727368 -0.610597 1.542477 -0.959981 0.757787 -0.306517 -0.322415 0.233469 0.268869 Breakfast
In [7]:
len(dataset)
Out[7]:
259
In [8]:
corr = df.corr()
sns.heatmap(corr,linewidths=.5, cmap="YlGnBu")
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f600524f4e0>
In [9]:
plotData = normalized_df[["Category","Calories","Total Fat","Carbohydrates","Protein"]]

sns.set(style="ticks")
sns.pairplot(plotData, hue="Category")
Out[9]:
<seaborn.axisgrid.PairGrid at 0x7f600316bda0>
In [10]:
x = dataset["Total Fat"]
y = dataset["Carbohydrates"]
z = dataset["Protein"]
c = dataset["Calories"]

fig = plt.figure()
ax = plt.axes(projection='3d')
ax.scatter(x, y, z, c=c, cmap='viridis', linewidth=0.5);

ax.view_init(25,55)
In [11]:
plotData = normalized_df[["Category","Calories","Cholesterol","Sodium","Sugars"]]

sns.set(style="ticks")
sns.pairplot(plotData, hue="Category")
Out[11]:
<seaborn.axisgrid.PairGrid at 0x7f60029d1828>
In [12]:
x = dataset["Cholesterol"]
y = dataset["Sodium"]
z = dataset["Sugars"]
c = dataset["Calories"]

fig = plt.figure()
ax = plt.axes(projection='3d')
ax.scatter(x, y, z, c=c, cmap='viridis', linewidth=0.5);

ax.view_init(25,70)
In [13]:
points = dataset.drop(['Category','Item','Serving Size'],axis=1)
points

dbScoreList = []

for i in range(2,21):
    clusterCount = i
    kmeans = KMeans(n_clusters=i)
    clusters = kmeans.fit_predict(points)

    dbScore = davies_bouldin_score(points,clusters)
    dbScoreList.append(dbScore)


clusterList = list(clusters)
dbScoreList
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: invalid value encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
/home/adam/.local/lib/python3.7/site-packages/sklearn/metrics/cluster/unsupervised.py:342: RuntimeWarning: divide by zero encountered in true_divide
  score = (intra_dists[:, None] + intra_dists) / centroid_distances
Out[13]:
[0.5049080375531603,
 0.7119836969199111,
 0.7948672957175776,
 0.6875017452313921,
 0.7471688293648517,
 0.8004439020548009,
 0.8456648391141388,
 0.8551351151369286,
 0.8758372445617372,
 0.860844717359679,
 0.8133113914631832,
 0.8442599641151366,
 0.8883714867016513,
 0.8650423460864235,
 0.9050236711697273,
 0.8853383144007435,
 0.8505738885779925,
 0.8743270495979744,
 0.8699230945528857]
In [14]:
clusterCount = 4
kmeans = KMeans(n_clusters=4)
clusters = kmeans.fit_predict(points)
clusters
Out[14]:
array([2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2,
       1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 3, 0, 3, 3, 2, 2,
       1, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2,
       2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 3, 0, 2, 1, 0, 3, 2, 2, 3, 2,
       2, 2, 2, 2, 2, 2, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3,
       3, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 0, 0, 3, 3, 0, 3, 0, 0, 3, 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 0, 3, 3, 0, 3,
       3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0], dtype=int32)
In [15]:
dataPlusClusters = dataset
dataPlusClusters['Cluster'] = clusters
dataPlusClusters
Out[15]:
Category Item Serving Size Calories Calories from Fat Total Fat Saturated Fat Trans Fat Cholesterol Sodium Carbohydrates Dietary Fiber Sugars Protein Vitamin A (% Daily Value) Vitamin C (% Daily Value) Calcium (% Daily Value) Iron (% Daily Value) Cluster
0 Breakfast Egg McMuffin 4.8 oz (136 g) 300 120 13.0 5.0 0.0 260 750 31 4 3 17 10 0 25 15 2
1 Breakfast Egg White Delight 4.8 oz (135 g) 250 70 8.0 3.0 0.0 25 770 30 4 3 18 6 0 25 8 2
2 Breakfast Sausage McMuffin 3.9 oz (111 g) 370 200 23.0 8.0 0.0 45 780 29 4 2 14 8 0 25 10 2
3 Breakfast Sausage McMuffin with Egg 5.7 oz (161 g) 450 250 28.0 10.0 0.0 285 860 30 4 2 21 15 0 30 15 2
4 Breakfast Sausage McMuffin with Egg Whites 5.7 oz (161 g) 400 210 23.0 8.0 0.0 50 880 30 4 2 21 6 0 25 10 2
5 Breakfast Steak & Egg McMuffin 6.5 oz (185 g) 430 210 23.0 9.0 1.0 300 960 31 4 3 26 15 2 30 20 2
6 Breakfast Bacon, Egg & Cheese Biscuit (Regular Biscuit) 5.3 oz (150 g) 460 230 26.0 13.0 0.0 250 1300 38 2 3 19 10 8 15 15 2
7 Breakfast Bacon, Egg & Cheese Biscuit (Large Biscuit) 5.8 oz (164 g) 520 270 30.0 14.0 0.0 250 1410 43 3 4 19 15 8 20 20 1
8 Breakfast Bacon, Egg & Cheese Biscuit with Egg Whites (R... 5.4 oz (153 g) 410 180 20.0 11.0 0.0 35 1300 36 2 3 20 2 8 15 10 2
9 Breakfast Bacon, Egg & Cheese Biscuit with Egg Whites (L... 5.9 oz (167 g) 470 220 25.0 12.0 0.0 35 1420 42 3 4 20 6 8 15 15 1
10 Breakfast Sausage Biscuit (Regular Biscuit) 4.1 oz (117 g) 430 240 27.0 12.0 0.0 30 1080 34 2 2 11 0 0 6 15 2
11 Breakfast Sausage Biscuit (Large Biscuit) 4.6 oz (131 g) 480 280 31.0 13.0 0.0 30 1190 39 3 3 11 4 0 8 15 2
12 Breakfast Sausage Biscuit with Egg (Regular Biscuit) 5.7 oz (163 g) 510 290 33.0 14.0 0.0 250 1170 36 2 2 18 6 0 10 20 2
13 Breakfast Sausage Biscuit with Egg (Large Biscuit) 6.2 oz (177 g) 570 330 37.0 15.0 0.0 250 1280 42 3 3 18 10 0 10 20 1
14 Breakfast Sausage Biscuit with Egg Whites (Regular Biscuit) 5.9 oz (167 g) 460 250 27.0 12.0 0.0 35 1180 34 2 3 18 0 0 8 15 2
15 Breakfast Sausage Biscuit with Egg Whites (Large Biscuit) 6.4 oz (181 g) 520 280 32.0 13.0 0.0 35 1290 40 3 3 18 4 0 8 15 2
16 Breakfast Southern Style Chicken Biscuit (Regular Biscuit) 5 oz (143 g) 410 180 20.0 8.0 0.0 30 1180 41 2 3 17 0 2 6 15 2
17 Breakfast Southern Style Chicken Biscuit (Large Biscuit) 5.5 oz (157 g) 470 220 24.0 9.0 0.0 30 1290 46 3 4 17 4 2 8 15 2
18 Breakfast Steak & Egg Biscuit (Regular Biscuit) 7.1 oz (201 g) 540 290 32.0 16.0 1.0 280 1470 38 2 3 25 10 2 20 25 1
19 Breakfast Bacon, Egg & Cheese McGriddles 6.1 oz (174 g) 460 190 21.0 9.0 0.0 250 1250 48 2 15 19 10 10 20 15 2
20 Breakfast Bacon, Egg & Cheese McGriddles with Egg Whites 6.3 oz (178 g) 400 140 15.0 7.0 0.0 35 1250 47 2 16 20 2 10 15 10 2
21 Breakfast Sausage McGriddles 5 oz (141 g) 420 200 22.0 8.0 0.0 35 1030 44 2 15 11 0 0 8 10 2
22 Breakfast Sausage, Egg & Cheese McGriddles 7.1 oz (201 g) 550 280 31.0 12.0 0.0 265 1320 48 2 15 20 10 0 20 15 1
23 Breakfast Sausage, Egg & Cheese McGriddles with Egg Whites 7.2 oz (205 g) 500 230 26.0 10.0 0.0 50 1320 46 2 15 21 2 0 20 10 2
24 Breakfast Bacon, Egg & Cheese Bagel 6.9 oz (197 g) 620 280 31.0 11.0 0.5 275 1480 57 3 7 30 20 15 20 20 1
25 Breakfast Bacon, Egg & Cheese Bagel with Egg Whites 7.1 oz (201 g) 570 230 25.0 9.0 0.5 60 1480 55 3 8 30 10 15 20 15 1
26 Breakfast Steak, Egg & Cheese Bagel 8.5 oz (241 g) 670 310 35.0 13.0 1.5 295 1510 56 3 7 33 20 4 25 25 1
27 Breakfast Big Breakfast (Regular Biscuit) 9.5 oz (269 g) 740 430 48.0 17.0 0.0 555 1560 51 3 3 28 15 2 15 25 1
28 Breakfast Big Breakfast (Large Biscuit) 10 oz (283 g) 800 470 52.0 18.0 0.0 555 1680 56 4 3 28 15 2 15 30 1
29 Breakfast Big Breakfast with Egg Whites (Regular Biscuit) 9.6 oz (272 g) 640 330 37.0 14.0 0.0 35 1590 50 3 3 26 0 2 10 15 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
230 Coffee & Tea Frappé Chocolate Chip (Medium) 16 fl oz cup 630 240 26.0 17.0 1.0 80 160 91 1 81 9 15 0 30 4 0
231 Coffee & Tea Frappé Chocolate Chip (Large) 22 fl oz cup 760 280 31.0 20.0 1.5 95 200 111 1 99 12 20 0 35 6 0
232 Smoothies & Shakes Blueberry Pomegranate Smoothie (Small) 12 fl oz cup 220 5 0.5 0.0 0.0 5 40 50 3 44 2 0 2 6 2 3
233 Smoothies & Shakes Blueberry Pomegranate Smoothie (Medium) 16 fl oz cup 260 5 1.0 0.0 0.0 5 50 62 4 54 3 0 4 8 2 3
234 Smoothies & Shakes Blueberry Pomegranate Smoothie (Large) 22 fl oz cup 340 10 1.0 0.5 0.0 5 65 79 5 70 4 0 4 10 2 3
235 Smoothies & Shakes Strawberry Banana Smoothie (Small) 12 fl oz cup 210 5 0.5 0.0 0.0 5 50 47 3 44 3 0 30 8 2 3
236 Smoothies & Shakes Strawberry Banana Smoothie (Medium) 16 fl oz cup 250 5 1.0 0.0 0.0 5 60 58 3 54 4 0 35 8 4 3
237 Smoothies & Shakes Strawberry Banana Smoothie (Large) 22 fl oz cup 330 10 1.0 0.5 0.0 5 80 74 4 70 5 0 45 10 4 3
238 Smoothies & Shakes Mango Pineapple Smoothie (Small) 12 fl oz cup 210 5 0.5 0.0 0.0 5 40 50 1 46 2 30 20 8 2 3
239 Smoothies & Shakes Mango Pineapple Smoothie (Medium) 16 fl oz cup 260 10 1.0 0.0 0.0 5 45 61 1 56 3 40 25 8 2 3
240 Smoothies & Shakes Mango Pineapple Smoothie (Large) 22 fl oz cup 340 10 1.0 0.5 0.0 5 60 78 2 72 4 50 30 10 2 3
241 Smoothies & Shakes Vanilla Shake (Small) 12 fl oz cup 530 140 15.0 10.0 1.0 60 160 86 0 63 11 20 0 40 0 0
242 Smoothies & Shakes Vanilla Shake (Medium) 16 fl oz cup 660 170 19.0 12.0 1.0 75 200 109 0 81 14 25 0 50 0 0
243 Smoothies & Shakes Vanilla Shake (Large) 22 fl oz cup 820 210 23.0 15.0 1.0 90 260 135 0 101 18 30 0 60 0 0
244 Smoothies & Shakes Strawberry Shake (Small) 12 fl oz cup 550 150 16.0 10.0 1.0 60 160 90 0 79 12 20 0 40 0 0
245 Smoothies & Shakes Strawberry Shake (Medium) 16 fl oz cup 690 180 20.0 13.0 1.0 75 210 114 0 100 15 25 0 50 0 0
246 Smoothies & Shakes Strawberry Shake (Large) 22 fl oz cup 850 210 24.0 15.0 1.0 90 260 140 0 123 18 30 0 70 0 0
247 Smoothies & Shakes Chocolate Shake (Small) 12 fl oz cup 560 150 16.0 10.0 1.0 60 240 91 1 77 12 20 0 40 8 0
248 Smoothies & Shakes Chocolate Shake (Medium) 16 fl oz cup 700 180 20.0 12.0 1.0 75 300 114 2 97 15 25 0 50 10 0
249 Smoothies & Shakes Chocolate Shake (Large) 22 fl oz cup 850 210 23.0 15.0 1.0 85 380 141 2 120 19 30 0 60 15 0
250 Smoothies & Shakes Shamrock Shake (Medium) 16 fl oz cup 660 170 19.0 12.0 1.0 75 210 109 0 93 14 25 0 50 0 0
251 Smoothies & Shakes Shamrock Shake (Large) 22 fl oz cup 820 210 23.0 15.0 1.0 90 260 135 0 115 18 30 0 60 0 0
252 Smoothies & Shakes McFlurry with M&M’s Candies (Small) 10.9 oz (310 g) 650 210 23.0 14.0 0.5 50 180 96 1 89 13 15 0 45 8 0
253 Smoothies & Shakes McFlurry with M&M’s Candies (Medium) 16.2 oz (460 g) 930 290 33.0 20.0 1.0 75 260 139 2 128 20 25 0 70 10 0
254 Smoothies & Shakes McFlurry with M&M’s Candies (Snack) 7.3 oz (207 g) 430 140 15.0 10.0 0.0 35 120 64 1 59 9 10 0 30 4 0
255 Smoothies & Shakes McFlurry with Oreo Cookies (Small) 10.1 oz (285 g) 510 150 17.0 9.0 0.5 45 280 80 1 64 12 15 0 40 8 0
256 Smoothies & Shakes McFlurry with Oreo Cookies (Medium) 13.4 oz (381 g) 690 200 23.0 12.0 1.0 55 380 106 1 85 15 20 0 50 10 0
257 Smoothies & Shakes McFlurry with Oreo Cookies (Snack) 6.7 oz (190 g) 340 100 11.0 6.0 0.0 30 190 53 1 43 8 10 0 25 6 3
258 Smoothies & Shakes McFlurry with Reese's Peanut Butter Cups (Medium) 14.2 oz (403 g) 810 290 32.0 15.0 1.0 60 400 114 2 103 21 20 0 60 6 0
259 Smoothies & Shakes McFlurry with Reese's Peanut Butter Cups (Snack) 7.1 oz (202 g) 410 150 16.0 8.0 0.0 30 200 57 1 51 10 10 0 30 4 0

259 rows × 19 columns

In [16]:
for i in range(clusterCount):
    foodText = list(dataPlusClusters[dataPlusClusters['Cluster'] == i]['Item'])
    pprint.pprint(foodText)
    print("----------------------------------------------------------")
    #print(topFoods(foodText,3))
['Hotcakes',
 'Cinnamon Melts',
 'Chicken McNuggets (6 piece)',
 'Filet-O-Fish',
 'Large French Fries',
 'Caramel Latte (Large)',
 'Hazelnut Latte (Large)',
 'French Vanilla Latte (Large)',
 'Mocha (Medium)',
 'Mocha (Large)',
 'Mocha with Nonfat Milk (Large)',
 'Caramel Mocha (Medium)',
 'Caramel Mocha (Large)',
 'Nonfat Caramel Mocha (Large)',
 'Hot Chocolate (Small)',
 'Hot Chocolate (Medium)',
 'Hot Chocolate (Large)',
 'Hot Chocolate with Nonfat Milk (Large)',
 'Iced Mocha (Large)',
 'Iced Mocha with Nonfat Milk (Large)',
 'Iced Caramel Mocha (Large)',
 'Iced Nonfat Caramel Mocha (Large)',
 'Frappé Mocha (Small)',
 'Frappé Mocha (Medium)',
 'Frappé Mocha (Large)',
 'Frappé Caramel (Small)',
 'Frappé Caramel (Medium)',
 'Frappé Caramel (Large)',
 'Frappé Chocolate Chip (Small)',
 'Frappé Chocolate Chip (Medium)',
 'Frappé Chocolate Chip (Large)',
 'Vanilla Shake (Small)',
 'Vanilla Shake (Medium)',
 'Vanilla Shake (Large)',
 'Strawberry Shake (Small)',
 'Strawberry Shake (Medium)',
 'Strawberry Shake (Large)',
 'Chocolate Shake (Small)',
 'Chocolate Shake (Medium)',
 'Chocolate Shake (Large)',
 'Shamrock Shake (Medium)',
 'Shamrock Shake (Large)',
 'McFlurry with M&M’s Candies (Small)',
 'McFlurry with M&M’s Candies (Medium)',
 'McFlurry with M&M’s Candies (Snack)',
 'McFlurry with Oreo Cookies (Small)',
 'McFlurry with Oreo Cookies (Medium)',
 "McFlurry with Reese's Peanut Butter Cups (Medium)",
 "McFlurry with Reese's Peanut Butter Cups (Snack)"]
----------------------------------------------------------
['Bacon, Egg & Cheese Biscuit (Large Biscuit)',
 'Bacon, Egg & Cheese Biscuit with Egg Whites (Large Biscuit)',
 'Sausage Biscuit with Egg (Large Biscuit)',
 'Steak & Egg Biscuit (Regular Biscuit)',
 'Sausage, Egg & Cheese McGriddles',
 'Bacon, Egg & Cheese Bagel',
 'Bacon, Egg & Cheese Bagel with Egg Whites',
 'Steak, Egg & Cheese Bagel',
 'Big Breakfast (Regular Biscuit)',
 'Big Breakfast (Large Biscuit)',
 'Big Breakfast with Egg Whites (Regular Biscuit)',
 'Big Breakfast with Egg Whites (Large Biscuit)',
 'Big Breakfast with Hotcakes (Regular Biscuit)',
 'Big Breakfast with Hotcakes (Large Biscuit)',
 'Big Breakfast with Hotcakes and Egg Whites (Regular Biscuit)',
 'Big Breakfast with Hotcakes and Egg Whites (Large Biscuit)',
 'Quarter Pounder with Bacon & Cheese',
 'Double Quarter Pounder with Cheese',
 'Bacon Clubhouse Burger',
 'Premium Crispy Chicken Club Sandwich',
 'Premium Crispy Chicken Ranch BLT Sandwich',
 'Bacon Clubhouse Crispy Chicken Sandwich',
 'Bacon Clubhouse Grilled Chicken Sandwich',
 'Premium McWrap Chicken & Bacon (Crispy Chicken)',
 'Premium McWrap Chicken & Ranch (Crispy Chicken)',
 'Premium McWrap Southwest Chicken (Crispy Chicken)',
 'Chicken McNuggets (20 piece)']
----------------------------------------------------------
['Egg McMuffin',
 'Egg White Delight',
 'Sausage McMuffin',
 'Sausage McMuffin with Egg',
 'Sausage McMuffin with Egg Whites',
 'Steak & Egg McMuffin',
 'Bacon, Egg & Cheese Biscuit (Regular Biscuit)',
 'Bacon, Egg & Cheese Biscuit with Egg Whites (Regular Biscuit)',
 'Sausage Biscuit (Regular Biscuit)',
 'Sausage Biscuit (Large Biscuit)',
 'Sausage Biscuit with Egg (Regular Biscuit)',
 'Sausage Biscuit with Egg Whites (Regular Biscuit)',
 'Sausage Biscuit with Egg Whites (Large Biscuit)',
 'Southern Style Chicken Biscuit (Regular Biscuit)',
 'Southern Style Chicken Biscuit (Large Biscuit)',
 'Bacon, Egg & Cheese McGriddles',
 'Bacon, Egg & Cheese McGriddles with Egg Whites',
 'Sausage McGriddles',
 'Sausage, Egg & Cheese McGriddles with Egg Whites',
 'Hotcakes and Sausage',
 'Sausage Burrito',
 'Big Mac',
 'Quarter Pounder with Cheese',
 'Quarter Pounder with Bacon Habanero Ranch',
 'Quarter Pounder Deluxe',
 'Cheeseburger',
 'Double Cheeseburger',
 'McDouble',
 'Bacon McDouble',
 'Daily Double',
 'Jalapeño Double',
 'McRib',
 'Premium Crispy Chicken Classic Sandwich',
 'Premium Grilled Chicken Classic Sandwich',
 'Premium Grilled Chicken Club Sandwich',
 'Premium Grilled Chicken Ranch BLT Sandwich',
 'Southern Style Crispy Chicken Sandwich',
 'McChicken',
 'Bacon Cheddar McChicken',
 'Bacon Buffalo Ranch McChicken',
 'Buffalo Ranch McChicken',
 'Premium McWrap Chicken & Bacon (Grilled Chicken)',
 'Premium McWrap Chicken & Ranch (Grilled Chicken)',
 'Premium McWrap Southwest Chicken (Grilled Chicken)',
 'Premium McWrap Chicken Sweet Chili (Crispy Chicken)',
 'Premium McWrap Chicken Sweet Chili (Grilled Chicken)',
 'Chicken McNuggets (10 piece)',
 'Premium Bacon Ranch Salad with Crispy Chicken',
 'Premium Bacon Ranch Salad with Grilled Chicken',
 'Premium Southwest Salad with Crispy Chicken',
 'Premium Southwest Salad with Grilled Chicken',
 'Chipotle BBQ Snack Wrap (Crispy Chicken)',
 'Chipotle BBQ Snack Wrap (Grilled Chicken)',
 'Honey Mustard Snack Wrap (Crispy Chicken)',
 'Honey Mustard Snack Wrap (Grilled Chicken)',
 'Ranch Snack Wrap (Crispy Chicken)',
 'Ranch Snack Wrap (Grilled Chicken)']
----------------------------------------------------------
['Hash Brown',
 'Fruit & Maple Oatmeal',
 'Fruit & Maple Oatmeal without Brown Sugar',
 'Hamburger',
 'Chicken McNuggets (4 piece)',
 'Premium Bacon Ranch Salad (without Chicken)',
 'Premium Southwest Salad (without Chicken)',
 'Small French Fries',
 'Medium French Fries',
 'Kids French Fries',
 'Side Salad',
 'Apple Slices',
 "Fruit 'n Yogurt Parfait",
 'Baked Apple Pie',
 'Chocolate Chip Cookie',
 'Oatmeal Raisin Cookie',
 'Kids Ice Cream Cone',
 'Hot Fudge Sundae',
 'Hot Caramel Sundae',
 'Strawberry Sundae',
 'Coca-Cola Classic (Small)',
 'Coca-Cola Classic (Medium)',
 'Coca-Cola Classic (Large)',
 'Coca-Cola Classic (Child)',
 'Diet Coke (Small)',
 'Diet Coke (Medium)',
 'Diet Coke (Large)',
 'Diet Coke (Child)',
 'Dr Pepper (Small)',
 'Dr Pepper (Medium)',
 'Dr Pepper (Large)',
 'Dr Pepper (Child)',
 'Diet Dr Pepper (Small)',
 'Diet Dr Pepper (Medium)',
 'Diet Dr Pepper (Large)',
 'Diet Dr Pepper (Child)',
 'Sprite (Small)',
 'Sprite (Medium)',
 'Sprite (Large)',
 'Sprite (Child)',
 '1% Low Fat Milk Jug',
 'Fat Free Chocolate Milk Jug',
 'Minute Maid 100% Apple Juice Box',
 'Minute Maid Orange Juice (Small)',
 'Minute Maid Orange Juice (Medium)',
 'Minute Maid Orange Juice (Large)',
 'Dasani Water Bottle',
 'Iced Tea (Small)',
 'Iced Tea (Medium)',
 'Iced Tea (Large)',
 'Iced Tea (Child)',
 'Sweet Tea (Small)',
 'Sweet Tea (Medium)',
 'Sweet Tea (Large)',
 'Sweet Tea (Child)',
 'Coffee (Small)',
 'Coffee (Medium)',
 'Coffee (Large)',
 'Latte (Small)',
 'Latte (Medium)',
 'Latte (Large)',
 'Caramel Latte (Small)',
 'Caramel Latte (Medium)',
 'Hazelnut Latte (Small)',
 'Hazelnut Latte (Medium)',
 'French Vanilla Latte (Small)',
 'French Vanilla Latte (Medium)',
 'Latte with Sugar Free French Vanilla Syrup (Small)',
 'Latte with Sugar Free French Vanilla Syrup (Medium)',
 'Latte with Sugar Free French Vanilla Syrup (Large)',
 'Nonfat Latte (Small)',
 'Nonfat Latte (Medium)',
 'Nonfat Latte (Large)',
 'Nonfat Caramel Latte (Small)',
 'Nonfat Caramel Latte (Medium)',
 'Nonfat Caramel Latte (Large)',
 'Nonfat Hazelnut Latte (Small)',
 'Nonfat Hazelnut Latte (Medium)',
 'Nonfat Hazelnut Latte (Large)',
 'Nonfat French Vanilla Latte (Small)',
 'Nonfat French Vanilla Latte (Medium)',
 'Nonfat French Vanilla Latte (Large)',
 'Nonfat Latte with Sugar Free French Vanilla Syrup (Small)',
 'Nonfat Latte with Sugar Free French Vanilla Syrup (Medium)',
 'Nonfat Latte with Sugar Free French Vanilla Syrup (Large)',
 'Mocha (Small)',
 'Mocha with Nonfat Milk (Small)',
 'Mocha with Nonfat Milk (Medium)',
 'Caramel Mocha (Small)',
 'Nonfat Caramel Mocha (Small)',
 'Nonfat Caramel Mocha (Medium)',
 'Hot Chocolate with Nonfat Milk (Small)',
 'Hot Chocolate with Nonfat Milk (Medium)',
 'Regular Iced Coffee (Small)',
 'Regular Iced Coffee (Medium)',
 'Regular Iced Coffee (Large)',
 'Caramel Iced Coffee (Small)',
 'Caramel Iced Coffee (Medium)',
 'Caramel Iced Coffee (Large)',
 'Hazelnut Iced Coffee (Small)',
 'Hazelnut Iced Coffee (Medium)',
 'Hazelnut Iced Coffee (Large)',
 'French Vanilla Iced Coffee (Small)',
 'French Vanilla Iced Coffee (Medium)',
 'French Vanilla Iced Coffee (Large)',
 'Iced Coffee with Sugar Free French Vanilla Syrup (Small)',
 'Iced Coffee with Sugar Free French Vanilla Syrup (Medium)',
 'Iced Coffee with Sugar Free French Vanilla Syrup (Large)',
 'Iced Mocha (Small)',
 'Iced Mocha (Medium)',
 'Iced Mocha with Nonfat Milk (Small)',
 'Iced Mocha with Nonfat Milk (Medium)',
 'Iced Caramel Mocha (Small)',
 'Iced Caramel Mocha (Medium)',
 'Iced Nonfat Caramel Mocha (Small)',
 'Iced Nonfat Caramel Mocha (Medium)',
 'Blueberry Pomegranate Smoothie (Small)',
 'Blueberry Pomegranate Smoothie (Medium)',
 'Blueberry Pomegranate Smoothie (Large)',
 'Strawberry Banana Smoothie (Small)',
 'Strawberry Banana Smoothie (Medium)',
 'Strawberry Banana Smoothie (Large)',
 'Mango Pineapple Smoothie (Small)',
 'Mango Pineapple Smoothie (Medium)',
 'Mango Pineapple Smoothie (Large)',
 'McFlurry with Oreo Cookies (Snack)']
----------------------------------------------------------
In [17]:
#dataPlusClusters.head(5)
#sortedData = dataPlusClusters.sort_values(by=['Cluster'])
#sortedData
len(clusters)
normalized_df = normalized_df.drop(["Category"],axis=1)
normalized_df["Cluster"] = clusters
normalized_df = normalized_df.sort_values(by=['Cluster'])
normalized_df.head(5)
Out[17]:
Calories Calories from Fat Total Fat Saturated Fat Trans Fat Cholesterol Sodium Carbohydrates Dietary Fiber Sugars Protein Vitamin A (% Daily Value) Vitamin C (% Daily Value) Calcium (% Daily Value) Iron (% Daily Value) Cluster
259 0.214754 0.232210 0.176380 0.389017 -0.470098 -0.279133 -0.520905 0.355008 -0.396851 0.748492 -0.291258 -0.142577 -0.322415 0.527005 -0.422890 0
228 1.388580 1.108280 1.044267 2.099960 3.042075 0.472739 -0.539262 1.749771 -1.043294 2.038564 -0.195890 0.267273 -0.322415 0.820540 -0.653476 0
227 0.846814 0.670245 0.728672 1.719750 1.871351 0.299230 -0.594333 1.141797 -1.043294 1.445828 -0.386626 0.267273 -0.322415 0.527005 -0.653476 0
39 0.440490 0.407424 0.413076 0.579122 -0.470098 -0.452642 -0.208837 0.676876 0.896035 0.086023 -0.672729 -0.388487 -0.322415 -0.881968 0.845334 0
226 0.395343 0.407424 0.413076 1.149436 1.871351 0.125721 -0.658582 0.605350 -1.043294 0.957693 -0.577361 0.062348 -0.322415 0.233469 -0.653476 0
In [18]:
f, ax = plt.subplots(figsize=(11, 9))
sns.heatmap(normalized_df)
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f60002ebe10>
In [ ]: