#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties import numpy as np import random import string import math import re # 1 font point is $\frac{1}{72}$ of an inch # In[2]: height = 1 width = 1 plt.figure(figsize=(width, height)) plt.axis([0, width, 0, height]) plt.text(0, 0, "A", fontsize=72) # But what about width? Very hard with most fonts. Need monospace font # In[3]: height = 1 width = 0.8 font = FontProperties() font.set_family("monospace") plt.figure(figsize=(width, height)) plt.axis([0, width, 0, height]) plt.text(0, 0, "T", fontproperties = font, fontsize=72) # It looks like the width is just about $\frac{4}{5}$ of the height. # # Let's draw 100 random words on the screen. # In[4]: height = 30 width = 30 font = FontProperties() font.set_family("monospace") plt.figure(figsize=(width, height)) plt.axis([0, width, 0, height]) for i in range(100): text = "ABC" size = 20 + random.random() * 210 x = random.random() * width y = random.random() * height plt.text(x, y, text, fontproperties = font, fontsize=size) # To make them stop overlapping, we need to record where they are. # # We will use an object for the Word, and calculate the bounding box. # In[5]: class Word: WIDTHSCALE = 0.8 PTSIZE = 1 / 72 def __init__(self, x, y, text, size): self.x = x self.y = y self.text = text self.size = size def right(self): return self.x + len(self.text) * Word.WIDTHSCALE * self.size * Word.PTSIZE def top(self): return self.y + self.size * Word.PTSIZE def overlap(self, other): # http://gamemath.com/2011/09/detecting-whether-two-boxes-overlap/ if self.right() < other.x: return False if other.right() < self.x: return False if self.top() < other.y: return False if other.top() < self.y: return False return True # First algorithm. Keep generating new positions until they fit. # In[6]: height = 30 width = 30 font = FontProperties() font.set_family("monospace") plt.figure(figsize=(width, height)) plt.axis([0, width, 0, height]) words = [] for i in range(100): text = "ABC" overlap = True while overlap: overlap = False x = random.random() * width y = random.random() * height size = 20 + random.random() * 210 w = Word(x, y, text, size) for ow in words: if w.overlap(ow): overlap = True break words.append(w) for w in words: plt.text(w.x, w.y, w.text, fontproperties = font, fontsize=w.size) # In[7]: height = 30 width = 30 font = FontProperties() font.set_family("monospace") plt.figure(figsize=(width, height)) plt.axis([0, width, 0, height]) #plt.axis("off") words = [] for i in range(100): a = random.randint(0, 13) b = random.randint(0, 13) f = min(a,b) l = max(a,b) text = string.ascii_uppercase[f:l] overlap = True while overlap: overlap = False x = random.random() * width y = random.random() * height size = 20 + random.random() * 210 w = Word(x, y, text, size) for ow in words: if w.overlap(ow): overlap = True break words.append(w) for w in words: plt.text(w.x, w.y, w.text, fontproperties = font, fontsize=w.size) # It works with longer strings. But two issues # * It doesn't look good # * Our sizes need to be fixed to match frequency # # Wordle Algorithm # # http://static.mrfeinberg.com/bv_ch03.pdf # # use sqrt to scale word size # # In decending size: # * Initially place near center line # * If overlaping, move in spiral to find open placement # In[8]: #https://stackoverflow.com/questions/20924085/python-conversion-between-coordinates def cart2pol(x, y): rho = np.sqrt(x**2 + y**2) phi = np.arctan2(y, x) return(rho, phi) def pol2cart(rho, phi): x = rho * np.cos(phi) y = rho * np.sin(phi) return(x, y) # How do you move in a spiral? # # Polar coordinates, increment angle and radius # In[9]: xs = [] ys = [] r = 0 theta = 0 for i in range(200): x, y = pol2cart(r, theta) xs.append(x) ys.append(y) r += 0.01 theta += 0.3 plt.plot(xs, ys) plt.text(xs[-1], ys[-1], "A", fontproperties = font, fontsize=72) # Add movement function to our Word object. # In[10]: class Word: def __init__(self, x, y, text, size): self.initx = x self.inity = y self.r = 0 self.theta = 0 self.x = x self.y = y self.text = text self.size = size def right(self): return self.x + len(self.text) * .75 * self.size * 1/72 def top(self): return self.y + self.size * 1/72 def overlap(self, other): # http://gamemath.com/2011/09/detecting-whether-two-boxes-overlap/ if self.right() < other.x: return False if other.right() < self.x: return False if self.top() < other.y: return False if other.top() < self.y: return False return True def move(self): self.r += 0.01 self.theta += 0.3 newx, newy = pol2cart(self.r, self.theta) self.x = self.initx + newx self.y = self.inity + newy # Now, follow the algorithm. Adding them in random size order though. # In[11]: height = 30 width = 30 font = FontProperties() font.set_family("monospace") plt.figure(figsize=(width, height)) plt.axis([0, width, 0, height]) #plt.axis("off") words = [] for i in range(100): a = random.randint(0, 13) b = random.randint(0, 13) f = min(a,b) l = max(a,b) text = string.ascii_uppercase[f:l] x = random.random() * width y = random.random() * 2 + height / 2 size = 20 + random.random() * 210 w = Word(x, y, text, size) overlap = True while overlap: overlap = False for ow in words: if w.overlap(ow): overlap = True break if overlap: w.move() words.append(w) for w in words: plt.text(w.x, w.y, w.text, fontproperties = font, fontsize=w.size) # Now get actual data from our book. # # Remove stop words # In[12]: fopen = open("WheelOfTime/EyeOfTheWorld.txt") text = fopen.read().lower().replace("\n", " ") text = re.sub(r'[^\w\s]', '', text) fopen.close() d = {} for w in text.split(): d[w] = d.get(w, 0) + 1 # http://xpo6.com/download-stop-word-list/ stopf = open("stop-word-list.txt") for line in stopf: w = line.strip() d[w] = 0 # https://stackoverflow.com/questions/3121979/how-to-sort-list-tuple-of-lists-tuples data = [] for k in d: data.append((k, d[k])) data.sort(key=lambda tup: tup[1], reverse=True) freqs = data[:100] # Get English words so we can highlight non-English words # In[13]: dopen = open("english2.txt") english = {} for line in dopen: english[line.strip()] = 1 # In[14]: height = 30 width = 30 font = FontProperties() font.set_family("monospace") plt.figure(figsize=(width, height)) plt.axis([0, width, 0, height]) plt.axis("off") words = [] for f in freqs: x = random.random() * width y = random.random() * 2 + height / 2 size = 15 * math.sqrt(f[1] - (freqs[-1][1] - 1)) w = Word(x, y, f[0].upper(), size) overlap = True while overlap: overlap = False for j in range(len(words)): ow = words[j] if w.overlap(ow): overlap = True break if overlap: w.move() words.append(w) for w in words: if w.text.lower() not in english or w.text.lower() == "mat": plt.text(w.x, w.y, w.text, fontproperties = font, color='red', fontsize=w.size) else: plt.text(w.x, w.y, w.text, fontproperties = font, color='black', fontsize=w.size) # In[ ]: