#!/usr/bin/env python # coding: utf-8 # # # An unscientific survey of Python interpreters # # - Link to this notebook: http://bit.ly/pycon-ca-keynote # * Structured as a talk so you can use, e.g. `nbviewer` to read as a set of slides # - If you want to play with the notebook **right now**, you can use [free Jupyter notebook hosting on Azure ML Studio](http://blogs.technet.com/b/machinelearning/archive/2015/07/24/introducing-jupyter-notebooks-in-azure-ml-studio.aspx) if you don't want to download Jupyter itself # * Open any notebook in Azure ML Studio # * Go to `File -> Open...` # * Click `Upload` # * Select the `.ipynb` file you just downloaded # * Click `Upload` next to the notebook name in the file listing and wait for the button to disappear # * Click on the file to open the notebook # * The service is in preview and we realize this is a bit clunky for uploading # # About *Dr. Brett Cannon* # # - Python core developer since 2003 # - Blog is at http://snarky.ca # - On Google+ as [+BrettCannon](https://plus.google.com/+BrettCannon) # - On Twitter as [@brettsky](https://twitter.com/brettsky) # - On GitHub as [@brettcannon](https://github.com/brettcannon) # # Thanks to my employer # [Microsoft](http://www.microsoft.com/) -- specifically the Python team in the [data & analytics](https://studio.azureml.net/) group in [Azure](https://azure.microsoft.com) -- paid to get me to the conference. # # And of course there are job openings at Microsoft: # - Python team (the one I'm a part of): [pythonjobs@microsoft.com](mailto:pythonjobs@microsoft.com) # - General (which does include other positions involving Python): https://careers.microsoft.com # # What is this all about? # *Interpreters*! You might call them a *virtual machine* (VM) or a Python *implementation*. # # - Supports a modern version of Python # * Python 2.7 or Python 3.3 at minimum (sorry, [PyPy3](http://doc.pypy.org/en/latest/release-pypy3-2.4.0.html)) # - Can run *most* of the [Grand Unified Benchmark Suite](https://hg.python.org/benchmarks/) # - Available on Windows, Linux, **or** OS X # # A brief history of Python interpreters # # - 1989 # * [Guido starts working](http://python-history.blogspot.ca/2009/01/personal-history-part-1-cwi.html) on the Python language and implicitly the CPython interpreter in late December # - 1991 # * CPython 0.9.0 released on alt.sources on February 20th # - 1994 # * [CPython 1.0.0 released](https://en.wikipedia.org/wiki/History_of_Python#Version_release_dates) # - 1997 # * [Jim Hugunin creates JPython](https://en.wikipedia.org/wiki/Jython) # - 1998 # * [Christian Tismer starts developing Stackless 1.0](https://ep2013.europython.eu/conference/talks/the-story-of-stackless-python) # - 1999 # * Barry Warsaw [renames JPython to Jython](https://wiki.python.org/jython/JythonFaq/GeneralInfo) with the 2.0 release # # - 2001 # * [Parrot April Fool's joke occurs](http://developers.slashdot.org/story/01/03/28/1742237/perl--python--parrot) # - 2003 # * [Armin Rigo releases Psyco 1.0](http://sourceforge.net/projects/psyco/files/psyco/) # - 2004 # * [PyPy development starts](http://cordis.europa.eu/project/rcn/74481_en.html); ends up subsuming parts of Stackless and all of Psyco # * Dan Sugalski, lead developer of the Parrot VM, [gets a pie to the face](http://archive.oreilly.com/pub/a/oscon2004/friday/index.html) from Guido for losing a bet that Parrot would be faster than CPython (Dan actually never finished his Python interpreter) # - 2006 # * [Jim Hugunin releases IronPython 1.0.0 at PyCon](http://blogs.msdn.com/b/hugunin/archive/2006/09/05/741605.aspx) # - 2008 # * [Work starts on Unladen Swallow](https://code.google.com/p/unladen-swallow/wiki/Release2009Q1) # # - 2013 # * [MicroPython Kickstarter campaign launched](https://www.kickstarter.com/projects/214379695/micro-python-python-for-microcontrollers/updates); first alternative, partial Python 3 implementation # - 2014 # * [Pyston is announced at PyCon](https://blogs.dropbox.com/tech/2014/04/introducing-pyston-an-upcoming-jit-based-python-implementation/) # * [PyPy3 has its first stable release](https://mail.python.org/pipermail//pypy-dev/2014-June/012584.html); first full-featured, alternative Python 3 interpreter # - 2015 # * [Pyjion announced at PyData Seattle](http://seattle.pydata.org/schedule/presentation/14/) # * [Russell Keith-Magee announces VOC at Vancouver Python Day](https://speakerdeck.com/freakboy3742/python-on-the-move-the-state-of-mobile-python-1) # * It is [announced](https://mail.python.org/pipermail/edu-sig/2015-October/011334.html) that MicroPython will be the Python implementation on the [BBC micro:bit](https://www.microbit.co.uk/) # # The interpreters # *All 4 of them ...* # ## [CPython](https://www.python.org/) # # - Implemented in C # - Works with C extensions # - Available on Linux, OS X, and Windows # - The most compatible interpreter due to the fact that it is the implicit specification of the language # ## [Jython](http://www.jython.org/) # # - Implemented in Java # - Partial C extension compatibility being worked on through [JyNI](http://jyni.org/) # - Second oldest interpreter (behind CPython) # - Runs on any OS supporting JDK 7 # - Currently supports Python 2.7 # ## [PyPy](http://pypy.org/) # # - Implemented in RPython (a restricted subset of Python) # - Supports C extensions through: # * [CFFI](http://cffi.readthedocs.org/) # * ctypes # * [cppyy](http://doc.pypy.org/en/latest/cppyy.html) # - Historically the [fastest Python implementation](http://speed.pypy.org/) --ignoring startup -- thanks to its tracing JIT # - Works on Linux, OS X, and Windows # - Supports Python 2.7 and 3.2 # ## [IronPython](http://ironpython.net/) # # - Implemented in C# # - Supports C extensions through [IronClad](https://github.com/IronLanguages/ironclad) # - Works on any platform that supports .NET 3.5 and higher # - Supports Python 2.7 # # The (possible) future # *... if people stay motivated and funded.* # ## [Pyjion](https://github.com/microsoft/pyjion) # # - Extension of CPython to add a JIT # * New code written in C++11 using the [CoreCLR](https://github.com/dotnet/coreclr) JIT # - Long-term goal is to get patches pushed upstream so adding a JIT to CPython is an optional, drop-in feature # - Works on Windows only due to laziness/momentum # - Supports Python 3.6 # - Works today, but doesn't do very many optimizations # - **Disclaimer**: I work on this part-time # ## [Pyston](https://github.com/dropbox/pyston) # * Announced at the PyCon 2014 language summit # * Sponsored by Dropbox # * Uses CPython as a base, using LLVM for its JIT # * JIT compiling at two different levels # - Tracing JIT at the bytecode level # - LLVM JIT at the code object level # * Currently targeting Python 2.7 on Ubuntu # * Being actively developed, hitting 25% on its own web server benchmarks # ## [VOC](https://github.com/pybee/voc) # # * Announced at Vancouver Python Day 2015 # * Transpiler from CPython bytecode to JVM bytecode # - ... so technically not an interpreter, but it's still (hoping to be) an implementation of Python # * Goal is to allow writing Android apps in Python 3 # ## Skython # - [Announced](https://lwn.net/Articles/640177/) at the PyCon 2015 language summit # - From-scratch C implementation with no GIL # - Supports Python 3 # - Have not heard anything about it since the summit # # Other ways to speed up Python # *Because you can't always change which interpreter you use.* # ## [Cython](http://cython.org/) # # - Static compiler for Python code # * Essentially transpiles Python code to a equivalent C code using CPython's C API # - Static compiler for the Cython language # * Superset of Python # * Has support for declaring types so you can optimize at the C level # - Good if you would have tried to use C for some critical code # * Can also be used to interface with C code # ## [Numba](http://numba.pydata.org/) # # - JIT compiler for numeric Python code # * Directly targets array-based or mathematics code, so not general-purpose # - Requires only adding a decorator to key functions for performance # * Cheap enough to simply install it and give it a try on your performance-critical code # ## [CFFI](https://cffi.readthedocs.org) # # - Provides an FFI to C/C++ code # * Supports both CPython and PyPy # - Good if you need to wrap some C code # * Can also be used to wrap some C code you wrote for performance reasons # # An **UNSCIENTIFIC** view of performance # *The results are more like "guidelines" than something you can consider rigously measured.* # ## A note about benchmarking interpreters # # - Used the [Grand Unified Python Benchmark Suite](https://hg.python.org/benchmarks/) # - Run under Windows 10 on an Intel Core i7-5600U @ 2.6 GHz with 8 GB of RAM # - Used a 64-bit interpreter when possible # - All numbers are percentage of time taken relative to CPython 2.7.10 # * `2.0` means it took 2x/200% as long as CPython 2.7.10 # * `0.5` means the benchmark to 0.5x/50% as long # * In other words, smaller is better and `1.0` is equivalent to CPython 2.7.10 # In[118]: benchmark_names = ['2to3', 'call_method', 'call_method_slots', 'call_method_unknown', 'call_simple', 'chameleon_v2', 'chaos', 'django_v2', 'etree_generate', 'etree_iterparse', 'etree_parse', 'etree_process', 'fannkuch', 'fastpickle', 'fastunpickle', 'float', 'formatted_logging', 'go', 'hexiom2', 'json_dump_v2', 'json_load', 'mako_v2', 'meteor_contest', 'nbody', 'nqueens', 'pathlib', 'pickle_dict', 'pickle_list', 'pidigits', 'raytrace', 'regex_compile', 'regex_effbot', 'regex_v8', 'richards', 'silent_logging', 'simple_logging', 'spectral_norm', 'telco', 'tornado_http', 'unpickle_list'] pandas_data = [('Pyjion', [None, 1.09, 1.11, 1.28, 1.25, None, 1.47, None, 0.87, 2.29, 1.51, 1.02, 1.13, 0.64, 1.04, 1.01, 1.66, 1.21, 1.22, 1.32, 0.57, 0.88, 1.0, 0.96, 1.24, 1.21, 0.65, 0.56, 0.96, 1.12, 1.09, 1.0, 0.8, 0.95, 1.27, 1.67, 1.51, 0.02, None, 0.71]), ('CPython 3.5.0 PGO', [0.85, 0.87, 0.87, 0.89, 0.99, 0.87, 1.11, 0.81, 0.75, 1.9, 1.3, 0.88, 1.11, 0.69, 1.04, 0.94, 1.12, 0.9, 0.95, 1.23, 0.5, 0.85, 0.97, 0.93, 1.01, 1.08, 0.81, 0.61, 0.94, 0.96, 0.8, 1.02, 0.89, 0.76, 1.16, 1.05, 1.47, 0.01, 1.01, 0.75]), ('Jython 2.7.0', [2.57, 0.83, 0.78, 0.76, 0.63, None, 0.84, 1.11, 3.45, 16.58, 40.3, 3.19, 0.66, 1.12, 2.35, 0.96, 1.09, 0.53, 0.93, 1.55, 2.64, 1.42, 0.88, 1.24, 1.18, None, 2.7, 1.65, 0.93, 0.74, 0.96, 1.26, 1.64, 0.75, 1.48, 1.24, 0.64, 1.15, None, 1.64]), ('IronPython 2.7.5', [None, 0.47, 0.44, 0.91, 0.18, None, 0.64, None, 9.51, None, 64.26, 8.9, 0.86, 1.16, 0.94, 0.9, 1.39, 1.58, 0.83, 20.98, 8.84, 1.98, 1.05, 0.58, 1.03, 4.99, 1.34, 0.93, 1.34, 0.61, None, 1.0, 1.06, 1.08, 1.35, 1.6, 0.91, 1.27, None, 0.63]), ('PyPy 4.0.0', [0.92, 0.02, 0.02, 0.02, 0.02, 0.43, 0.02, 0.07, 0.3, 1.22, 2.75, 0.3, 0.18, 2.1, 1.4, 0.1, 0.18, 0.35, 0.1, 0.36, 0.28, 0.46, 0.44, 0.14, 0.2, 0.55, 4.85, 3.31, 2.87, 0.01, 0.18, 0.58, 2.04, 0.01, None, 0.16, 0.06, 0.03, 0.91, 0.88]), ('CPython 3.5.0', [0.99, 1.0, 0.98, 0.95, 1.09, 0.95, 1.28, 0.94, 0.84, 2.27, 1.44, 1.11, 1.22, 0.62, 1.2, 1.1, 1.23, 1.12, 1.12, 1.27, 0.52, 0.85, 0.98, 1.06, 1.16, 1.16, 0.64, 0.55, 0.95, 1.1, 0.89, 1.0, 0.77, 0.99, 1.25, 1.27, 1.64, 0.02, 1.06, 0.67])] import pandas df = pandas.DataFrame.from_items(pandas_data, orient='index', columns=benchmark_names).sort() # In[119]: import pprint pprint.pprint(benchmark_names) # In[120]: get_ipython().run_line_magic('matplotlib', 'inline') import seaborn seaborn.set_style("whitegrid") # Make baseline stand out. seaborn.mpl.rc("figure", figsize=(16, 5), dpi=80) # Make everything big at 1280x720. outlier_cutoff = 2.0 # When locking down scale, cap at 2x slower than CPython 2.7.10. y_label = 'Time taken relative to CPython 2.7.10 (smaller is better)' def add_baseline(plot): """Add a black line at 1.0 to represent parity of performance.""" plot.axhline(1, color="black") def benchplot(results, ylim=None): """Plot all the benchmark results individually as a boxplot.""" p = results.plot(kind='bar') add_baseline(p) p.set_ylabel(y_label) if ylim: p.set_ylim(0.0, outlier_cutoff) return p def overviewplot(results): """Plot all the benchmark results in an overview style using a boxplot and stripplot. The y axis representing performance compared to CPython 2.7.10 is capped at 2x slower to present a consistent scale across plots. """ p = seaborn.boxplot(data=results, orient='v', color="seagreen", fliersize=0) p2 = seaborn.stripplot(data=results, orient='v', color="purple", edgecolor="gray", jitter=True) add_baseline(p) p.set_ylim(0.0, outlier_cutoff) # For consistent scaling. return p, p2 # ## And now all the data in one unreadable plot! # In[121]: ax = df.plot(kind='bar', legend=None) ax.set_ylabel(y_label) # ## Individual interpreters # *Sliced and diced ...* # ### Jython 2.7.0 # `perf.py -b 2n3,-startup,-chameleon_v2,-pathlib,-tornado_http,-unpack_sequence` # In[122]: jython = df.loc['Jython 2.7.0'] # Jython has two **REALLY** bad benchmarks. # In[123]: benchplot(jython) # Going to crop the data to `2.0` on the y-axis make it readable. # In[124]: benchplot(jython, ylim=outlier_cutoff) # A visual overview of performance, with a crop of `2.0` to keep a consistent scale across all interpreters. # In[125]: overviewplot(jython) # ### IronPython 2.7.5 # `perf.py -a -b 2n3,-startup,-2to3,-django_v2,-etree_iterparse,-regex_compile,-tornado_http` # In[126]: ironpython = df.loc['IronPython 2.7.5'] # In[127]: benchplot(ironpython) # Cropping the **REALLY** bad benchmarks to make the data readable # In[128]: benchplot(ironpython, ylim=outlier_cutoff) # In[129]: overviewplot(ironpython) # ### PyPy 4.0.0 # `perf.py -b 2n3,-startup` # In[130]: pypy = df.loc['PyPy 4.0.0'] # In[131]: benchplot(pypy) # In[132]: benchplot(pypy, ylim=outlier_cutoff) # In[133]: overviewplot(pypy) # ### CPython 3.5.0 # `perf.py -b 2n3,-startup` # In[134]: cpython = df.loc['CPython 3.5.0'] # In[135]: benchplot(cpython) # In[136]: benchplot(cpython, ylim=outlier_cutoff) # In[137]: overviewplot(cpython) # ### CPython 3.5 with profile-guided optimizations # `perf.py -b 2n3,-startup` # # Trained on Python's test suite. # In[138]: cpython_pgo = df.loc['CPython 3.5.0 PGO'] # In[139]: benchplot(cpython_pgo) # In[140]: overviewplot(cpython_pgo) # #### Aside: where is the PGO build actually a hindrance? # In[141]: # Which benchmarks are **faster** without PGO? cpython_faster = cpython < cpython_pgo cpython_faster[cpython_faster.isin([True])] # In[142]: (cpython_pgo - cpython)[cpython_faster] # In[143]: # All benchmarks +/- 10% speed difference. threshold = 0.10 cpython_diff = cpython_pgo - cpython cpython_diff_faster = cpython_diff[cpython_diff < -threshold] cpython_diff_slower = cpython_diff[cpython_diff > threshold] pandas.concat([cpython_diff_faster, cpython_diff_slower]) # ### Pyjion # `perf.py -b 2n3,-startup,-2to3,-chameleon_v2,-tornado_http` # In[144]: pyjion = df.loc['Pyjion'] # In[145]: benchplot(pyjion) # In[146]: benchplot(pyjion, outlier_cutoff) # In[147]: overviewplot(pyjion) # ## Overview # ### Boxplot showing all interpreters side-by-side # Remember, scale is capped at 2x slower than CPython 2.7.10. # In[149]: overviewplot(df.transpose()) # ### Geometric mean over all benchmarks # Gives a simplistic number to classify overall performance. # In[150]: def geometric_mean(series): return series.prod(skipna=True) ** (1/series.count()) def geometric_mean_series(dataframe): interpreters = dataframe.index gmean = [] for interpreter in interpreters: gmean.append(geometric_mean(dataframe.loc[interpreter])) return pandas.Series(data=gmean, index=interpreters) # In[151]: benchplot(geometric_mean_series(df)) # # Final Remarks # # - Python has a healthy collection of interpreters # - Considering how dynamic Python is and that CPython is tuned for the language, all the interpreters performed admirably and with great compatibility # - All interpreters mentioned are open source, so feel free to help out!