import re
from operator import is_not
from functools import partial, lru_cache
from sympy import init_printing
from sympy.plotting import plot
from latex2sympy import process_sympy
from pyquery import PyQuery
from notebook.utils import url_path_join as ujoin
We're going to do some scraping... at least do some caching.
@lru_cache(maxsize=128)
def pq(url):
return PyQuery(url)
There's a lot of crazy Latex out there, and a lot of math that latex2sympy
can't parse yet, so here's a helper function to winnow that down some.
This PR handles the various style expressions!
STYLE_RE = r'{\\(display|text|script)style (.*)\}'
def parses(raw):
match = re.match(STYLE_RE, raw, re.M)
if match is not None:
raw = match[2]
try:
return process_sympy(raw)
except:
pass
Wikipedia (or any other MediaWiki page with the Math extension.
TODO: How might we search for MediaWiki pages that use a given extension?
Here are some URLs and HTML selectors that have lots of neat stuff.
WIKIPEDIA = "https://en.wikipedia.org/wiki"
MEDIAWIKI_SELECTOR = ".mwe-math-fallback-image-inline[alt]"
MATH_WIKI = "http://math.wikia.com/wiki"
WIKIA_SELECTOR = ".tex[alt]"
def wiki_maths(topic, wiki=WIKIPEDIA, selector=MEDIAWIKI_SELECTOR, attrib="alt"):
url = ujoin(wiki, topic.replace(" ", "_"))
return list(filter(partial(is_not, None), [
parses(img.attrib[attrib])
for img in pq(url)(selector)
]))
wiki_maths("Tsiolkovsky rocket equation")
wiki_maths("Radar")
wiki_maths("Kalman filter")
wiki_maths("Differential equation", MATH_WIKI, WIKIA_SELECTOR)
The Jupyter Notebook uses Jupyter Markdown, which is a subset of Github-flavored Markdown plus inline and block math, wrapped in $
or $$
. You can drop a bit of the URL for nbviewer in this function and see all the defined equations from a notebook.
def notebook_math(nbviewer_fragment):
url = ujoin("https://nbviewer.jupyter.org", nbviewer_fragment)
r = requests.get(url)
matches = sum([
re.findall(r'\$\$(.*?)\$\$', p.text, flags=re.M)
for p in pq(url)("p") if p.text
], [])
return list(filter(partial(is_not, None), [
parses(match)
for match in matches
]))
notebook_math("github/waltherg/notebooks/blob/master/2013-12-03-Crank_Nicolson.ipynb")