I was curious what is a distribution of function arities in different Scala projects. Scala is a functional programming language, so I assumed the average number of arguments would be more than 2.
So I did a small research. I created a Scala compiler plugin that gathered defined functions arities. In this notebook I visualize collected data for the following projects:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
I don't count constructors, and do not count this
as implicit argument
def readStats(filename):
x = []
with open(filename, "r") as file:
for l in file:
name, num, implicits = l.strip().split()
n = int(num)
if name == "<init>":
continue
x.append(n)
return np.asarray(x)
scalaz = readStats("data/scalaz8.txt")
akka = readStats("data/akka2.4.txt")
scala = readStats("data/scala2.12.txt")
dotty = readStats("data/dotty.txt")
spark = readStats("data/spark2.2.txt")
cats = readStats("data/cats1.0.txt")
plt.style.use('seaborn')
fig = plt.figure(figsize=(12, 5))
fig.suptitle("Function arities in Scala projects")
plt.subplot(121)
plt.title("Scala 2.12")
plt.text(4, 8000, "mean: %.2f std: %.2f" % (np.mean(scala), np.std(scala)))
n, bins, patches = plt.hist(scala, bins=range(8), alpha=0.6, histtype='bar')
plt.subplot(122)
plt.title("Dotty 0.5.0-RC1")
plt.text(4, 2500, "mean: %.2f std: %.2f" % (np.mean(dotty), np.std(dotty)))
n, bins, patches = plt.hist(dotty, bins=range(8), alpha=0.6, histtype='bar')
fig = plt.figure(figsize=(12, 5))
fig.suptitle("Function arities in Scala projects")
plt.subplot(121)
plt.title("Akka 2.4")
plt.text(4, 10000, "mean: %.2f std: %.2f" % (np.mean(akka), np.std(akka)))
n, bins, patches = plt.hist(akka, bins=range(8), alpha=0.6, histtype='bar')
plt.subplot(122)
plt.title("Spark 2.2")
plt.text(4, 8000, "mean: %.2f std: %.2f" % (np.mean(spark), np.std(spark)))
n, bins, patches = plt.hist(spark, bins=range(8), alpha=0.6, histtype='bar')
fig = plt.figure(figsize=(12, 5))
fig.suptitle("Function arities in Scala projects")
plt.subplot(121)
plt.title("Cats 1.0")
plt.text(4, 800, "mean: %.2f std: %.2f" % (np.mean(cats), np.std(cats)))
n, bins, patches = plt.hist(cats, bins=range(8), alpha=0.6, histtype='bar')
plt.subplot(122)
plt.title("ScalaZ 8")
plt.text(4, 250, "mean: %.2f std: %.2f" % (np.mean(scalaz), np.std(scalaz)))
n, bins, patches = plt.hist(scalaz, bins=range(8), alpha=0.6, histtype='bar')
There are no conclusions. Just a general 'Aha, that's how it is'. I was surprised by the number of zero argument functions.