Open In Colab

In [35]:
#!pip install pandas pyfim==6.28
In [36]:
import pandas as pd
from fim import arules
In [37]:
# display docs
??arules
In [38]:
# for more details visit here: https://borgelt.net/pyfim.html

# inputs
supp = 2 # minimum support of an assoc. rule   (default: 10)
conf = 50 # minimum confidence of an assoc. rule (default: 80%)
report = 'asC'
In [39]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'],
           ['Milk', 'Unicorn', 'Eggs', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Yogurt', 'Eggs'],
           ['Corn', 'Yogurt', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'],
           ]
In [40]:
# make dict for nicer looking column names
report_colnames = {
    'a': 'support_itemset_absolute',
    's': 'support_itemset_relative',
    'S': 'support_itemset_relative_pct',
    'b': 'support_bodyset_absolute',
    'x': 'support_bodyset_relative',
    'X': 'support_bodyset_relative_pct',
    'h': 'support_headitem_absolute',
    'y': 'support_headitem_relative',
    'Y': 'support_headitem_relative_pct',
    'c': 'confidence',
    'C': 'confidence_pct',
    'l': 'lift',
    'L': 'lift_pct',
    'e': 'evaluation',
    'E': 'evaluation_pct',
    'Q': 'xx',
    'S': 'support_emptyset',
    }

# run apriori
result = arules(dataset, supp=supp, conf=conf, report=report)

# make df of results
colnames = ['consequent', 'antecedent'] + [report_colnames.get(k, k) for k in list(report)]
df_rules = pd.DataFrame(result, columns=colnames)
df_rules = df_rules.sort_values('support_itemset_absolute', ascending=False)
print(df_rules.shape)
(484, 5)
In [41]:
# look at some higher support rules
df_rules.head(10)
Out[41]:
consequent antecedent support_itemset_absolute support_itemset_relative confidence_pct
0 Eggs () 9 0.9 90.000000
3 Yogurt () 8 0.8 80.000000
11 Kidney Beans () 8 0.8 80.000000
2 Yogurt (Eggs,) 7 0.7 77.777778
4 Eggs (Kidney Beans,) 7 0.7 87.500000
5 Kidney Beans (Eggs,) 7 0.7 77.777778
1 Eggs (Yogurt,) 7 0.7 87.500000
10 Kidney Beans (Yogurt,) 6 0.6 75.000000
29 Corn () 6 0.6 60.000000
58 Milk () 6 0.6 60.000000
In [42]:
# look at some lower support rules
df_rules.tail(10)
Out[42]:
consequent antecedent support_itemset_absolute support_itemset_relative confidence_pct
294 Yogurt (Ice cream, Kidney Beans) 1 0.1 50.0
295 Kidney Beans (Ice cream, Yogurt) 1 0.1 100.0
112 Yogurt (Onion, Milk, Kidney Beans, Eggs) 1 0.1 100.0
111 Eggs (Onion, Milk, Kidney Beans, Yogurt) 1 0.1 100.0
110 Kidney Beans (Onion, Milk, Eggs) 1 0.1 100.0
299 Eggs (Ice cream, Corn, Yogurt) 1 0.1 100.0
300 Yogurt (Ice cream, Corn, Eggs) 1 0.1 50.0
301 Corn (Ice cream, Yogurt, Eggs) 1 0.1 100.0
302 Yogurt (Ice cream, Corn) 1 0.1 50.0
483 Milk (Apple,) 1 0.1 100.0