In [35]:

#!pip install pandas pyfim==6.28

In [36]:

import pandas as pd
from fim import arules

In [37]:

# display docs
??arules

In [38]:

# for more details visit here: https://borgelt.net/pyfim.html

# inputs
supp = 2 # minimum support of an assoc. rule   (default: 10)
conf = 50 # minimum confidence of an assoc. rule (default: 80%)
report = 'asC'

In [39]:

dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'],
           ['Milk', 'Unicorn', 'Eggs', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Yogurt', 'Eggs'],
           ['Corn', 'Yogurt', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'],
           ]

In [40]:

# make dict for nicer looking column names
report_colnames = {
    'a': 'support_itemset_absolute',
    's': 'support_itemset_relative',
    'S': 'support_itemset_relative_pct',
    'b': 'support_bodyset_absolute',
    'x': 'support_bodyset_relative',
    'X': 'support_bodyset_relative_pct',
    'h': 'support_headitem_absolute',
    'y': 'support_headitem_relative',
    'Y': 'support_headitem_relative_pct',
    'c': 'confidence',
    'C': 'confidence_pct',
    'l': 'lift',
    'L': 'lift_pct',
    'e': 'evaluation',
    'E': 'evaluation_pct',
    'Q': 'xx',
    'S': 'support_emptyset',
    }

# run apriori
result = arules(dataset, supp=supp, conf=conf, report=report)

# make df of results
colnames = ['consequent', 'antecedent'] + [report_colnames.get(k, k) for k in list(report)]
df_rules = pd.DataFrame(result, columns=colnames)
df_rules = df_rules.sort_values('support_itemset_absolute', ascending=False)
print(df_rules.shape)

(484, 5)

In [41]:

# look at some higher support rules
df_rules.head(10)

Out[41]:

	consequent	antecedent	support_itemset_absolute	support_itemset_relative	confidence_pct
0	Eggs	()	9	0.9	90.000000
3	Yogurt	()	8	0.8	80.000000
11	Kidney Beans	()	8	0.8	80.000000
2	Yogurt	(Eggs,)	7	0.7	77.777778
4	Eggs	(Kidney Beans,)	7	0.7	87.500000
5	Kidney Beans	(Eggs,)	7	0.7	77.777778
1	Eggs	(Yogurt,)	7	0.7	87.500000
10	Kidney Beans	(Yogurt,)	6	0.6	75.000000
29	Corn	()	6	0.6	60.000000
58	Milk	()	6	0.6	60.000000

In [42]:

# look at some lower support rules
df_rules.tail(10)

Out[42]:

	consequent	antecedent	support_itemset_absolute	support_itemset_relative	confidence_pct
294	Yogurt	(Ice cream, Kidney Beans)	1	0.1	50.0
295	Kidney Beans	(Ice cream, Yogurt)	1	0.1	100.0
112	Yogurt	(Onion, Milk, Kidney Beans, Eggs)	1	0.1	100.0
111	Eggs	(Onion, Milk, Kidney Beans, Yogurt)	1	0.1	100.0
110	Kidney Beans	(Onion, Milk, Eggs)	1	0.1	100.0
299	Eggs	(Ice cream, Corn, Yogurt)	1	0.1	100.0
300	Yogurt	(Ice cream, Corn, Eggs)	1	0.1	50.0
301	Corn	(Ice cream, Yogurt, Eggs)	1	0.1	100.0
302	Yogurt	(Ice cream, Corn)	1	0.1	50.0
483	Milk	(Apple,)	1	0.1	100.0