#!pip install pandas pyfim==6.28
import pandas as pd
from fim import arules
# display docs
??arules
# for more details visit here: https://borgelt.net/pyfim.html
# inputs
supp = 2 # minimum support of an assoc. rule (default: 10)
conf = 50 # minimum confidence of an assoc. rule (default: 80%)
report = 'asC'
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'],
['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'],
['Milk', 'Unicorn', 'Eggs', 'Kidney Beans', 'Yogurt'],
['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Yogurt', 'Eggs'],
['Corn', 'Yogurt', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'],
['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'],
]
# make dict for nicer looking column names
report_colnames = {
'a': 'support_itemset_absolute',
's': 'support_itemset_relative',
'S': 'support_itemset_relative_pct',
'b': 'support_bodyset_absolute',
'x': 'support_bodyset_relative',
'X': 'support_bodyset_relative_pct',
'h': 'support_headitem_absolute',
'y': 'support_headitem_relative',
'Y': 'support_headitem_relative_pct',
'c': 'confidence',
'C': 'confidence_pct',
'l': 'lift',
'L': 'lift_pct',
'e': 'evaluation',
'E': 'evaluation_pct',
'Q': 'xx',
'S': 'support_emptyset',
}
# run apriori
result = arules(dataset, supp=supp, conf=conf, report=report)
# make df of results
colnames = ['consequent', 'antecedent'] + [report_colnames.get(k, k) for k in list(report)]
df_rules = pd.DataFrame(result, columns=colnames)
df_rules = df_rules.sort_values('support_itemset_absolute', ascending=False)
print(df_rules.shape)
(484, 5)
# look at some higher support rules
df_rules.head(10)
consequent | antecedent | support_itemset_absolute | support_itemset_relative | confidence_pct | |
---|---|---|---|---|---|
0 | Eggs | () | 9 | 0.9 | 90.000000 |
3 | Yogurt | () | 8 | 0.8 | 80.000000 |
11 | Kidney Beans | () | 8 | 0.8 | 80.000000 |
2 | Yogurt | (Eggs,) | 7 | 0.7 | 77.777778 |
4 | Eggs | (Kidney Beans,) | 7 | 0.7 | 87.500000 |
5 | Kidney Beans | (Eggs,) | 7 | 0.7 | 77.777778 |
1 | Eggs | (Yogurt,) | 7 | 0.7 | 87.500000 |
10 | Kidney Beans | (Yogurt,) | 6 | 0.6 | 75.000000 |
29 | Corn | () | 6 | 0.6 | 60.000000 |
58 | Milk | () | 6 | 0.6 | 60.000000 |
# look at some lower support rules
df_rules.tail(10)
consequent | antecedent | support_itemset_absolute | support_itemset_relative | confidence_pct | |
---|---|---|---|---|---|
294 | Yogurt | (Ice cream, Kidney Beans) | 1 | 0.1 | 50.0 |
295 | Kidney Beans | (Ice cream, Yogurt) | 1 | 0.1 | 100.0 |
112 | Yogurt | (Onion, Milk, Kidney Beans, Eggs) | 1 | 0.1 | 100.0 |
111 | Eggs | (Onion, Milk, Kidney Beans, Yogurt) | 1 | 0.1 | 100.0 |
110 | Kidney Beans | (Onion, Milk, Eggs) | 1 | 0.1 | 100.0 |
299 | Eggs | (Ice cream, Corn, Yogurt) | 1 | 0.1 | 100.0 |
300 | Yogurt | (Ice cream, Corn, Eggs) | 1 | 0.1 | 50.0 |
301 | Corn | (Ice cream, Yogurt, Eggs) | 1 | 0.1 | 100.0 |
302 | Yogurt | (Ice cream, Corn) | 1 | 0.1 | 50.0 |
483 | Milk | (Apple,) | 1 | 0.1 | 100.0 |