#!/usr/bin/env python # coding: utf-8 # # Function arities of different Scala projects # # I was curious what is a distribution of function arities in different Scala projects. # Scala is a functional programming language, so I assumed the average number of arguments would be more than 2. # # So I did a small research. I created a Scala compiler plugin that gathered defined functions arities. # In this notebook I visualize collected data for the following projects: # - Scala 2.12.4 compiler and standard library # - Dotty 0.5.0-RC1 compiler # - Akka 2.4 # - Spark 2.2 # - Cats 1.0 # - Scalaz 7.2.9 # In[10]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import numpy as np # I don't count constructors, and do _not_ count `this` as implicit argument # In[11]: def readStats(filename): x = [] with open(filename, "r") as file: for l in file: name, num, implicits = l.strip().split() n = int(num) if name == "": continue x.append(n) return np.asarray(x) # In[12]: scalaz = readStats("data/scalaz8.txt") akka = readStats("data/akka2.4.txt") scala = readStats("data/scala2.12.txt") dotty = readStats("data/dotty.txt") spark = readStats("data/spark2.2.txt") cats = readStats("data/cats1.0.txt") # In[13]: plt.style.use('seaborn') fig = plt.figure(figsize=(12, 5)) fig.suptitle("Function arities in Scala projects") plt.subplot(121) plt.title("Scala 2.12") plt.text(4, 8000, "mean: %.2f std: %.2f" % (np.mean(scala), np.std(scala))) n, bins, patches = plt.hist(scala, bins=range(8), alpha=0.6, histtype='bar') plt.subplot(122) plt.title("Dotty 0.5.0-RC1") plt.text(4, 2500, "mean: %.2f std: %.2f" % (np.mean(dotty), np.std(dotty))) n, bins, patches = plt.hist(dotty, bins=range(8), alpha=0.6, histtype='bar') # In[14]: fig = plt.figure(figsize=(12, 5)) fig.suptitle("Function arities in Scala projects") plt.subplot(121) plt.title("Akka 2.4") plt.text(4, 10000, "mean: %.2f std: %.2f" % (np.mean(akka), np.std(akka))) n, bins, patches = plt.hist(akka, bins=range(8), alpha=0.6, histtype='bar') plt.subplot(122) plt.title("Spark 2.2") plt.text(4, 8000, "mean: %.2f std: %.2f" % (np.mean(spark), np.std(spark))) n, bins, patches = plt.hist(spark, bins=range(8), alpha=0.6, histtype='bar') # In[16]: fig = plt.figure(figsize=(12, 5)) fig.suptitle("Function arities in Scala projects") plt.subplot(121) plt.title("Cats 1.0") plt.text(4, 800, "mean: %.2f std: %.2f" % (np.mean(cats), np.std(cats))) n, bins, patches = plt.hist(cats, bins=range(8), alpha=0.6, histtype='bar') plt.subplot(122) plt.title("ScalaZ 8") plt.text(4, 250, "mean: %.2f std: %.2f" % (np.mean(scalaz), np.std(scalaz))) n, bins, patches = plt.hist(scalaz, bins=range(8), alpha=0.6, histtype='bar') # # Conclusions # # There are no conclusions. Just a general 'Aha, that's how it is'. I was surprised by the number of zero argument functions. # # Future work # # I'd like to gather similar statistics for Haskell projects. # # Please, contribute!