In [11]:
%matplotlib inline
In [12]:
SAVE_FIGURES = False
In [13]:
import cv2
import matplotlib.pyplot as plt

import lxml.html
import requests
import urlparse
import posixpath
import itertools
In [14]:
from sklearn import preprocessing
from sklearn import svm
from sklearn.cross_validation import train_test_split
from sklearn.decomposition import PCA
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import ShuffleSplit
from sklearn.metrics import confusion_matrix
In [15]:
import requests
import tempfile
import cv2
from PIL import Image
import pandas as pd
import numpy as np
In [16]:
def load_gif_url(url):
    with tempfile.NamedTemporaryFile(suffix=".gif") as f:
        f.write(requests.get(url).content)
        f.flush()
        img = Image.open(f.name)

    with tempfile.NamedTemporaryFile(suffix=".png") as f:
        img.save(f.name)
        f.flush()
        src = cv2.imread(f.name)

    assert src is not None and len(src), "Empty"

    return src

def show_bw(bw, frameon=None):
    if frameon is not None:
        plt.gca().set_frame_on(frameon)

    plt.imshow(bw, cmap='gray')
    _ = plt.xticks([]), plt.yticks([])


def get_bw(src):
    return cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
In [17]:
text = requests.get("http://www.50states.com/us.htm").text
doc = lxml.html.document_fromstring(text)

states = []
for a in doc.findall(".//ul[@class='bulletedList']/li/a"):
    url = a.get("href")
    state_name = posixpath.splitext(posixpath.split(urlparse.urlsplit(url).path)[-1])[0]
    states.append(state_name)
    
def make_url(state):
    return "http://www.50states.com/maps/%s.gif" % state
In [18]:
def get_state_color(state, dilate=True):
    url = make_url(state)
    
    IN = load_gif_url(url)

    #Drop the text at the top
    IN = IN[150:]

    #Convert 3 color channels to 1
    IN_bw = get_bw(IN)
  
    #invert colors (per docs for findContour)
    IMG = 255-IN_bw
    
    # This seems to bre required for mass
    if dilate:
        kernel = np.ones((3,3),np.uint8)
        IMG = cv2.dilate(IMG,kernel,iterations = 1)
    
    img_out = IMG.copy()
    contours, hierarchy = cv2.findContours(img_out, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    threshold = 0.02

    img = 255*np.ones(IN.shape, dtype=np.uint8)
    for i in xrange(len(contours)):
        cnt = contours[i]
        cnt_len = cv2.arcLength(cnt, True)
        cc = cv2.approxPolyDP(cnt, threshold * cnt_len, True)

        area = cv2.contourArea(cc)

        if cnt_len > 50 and area > 500:
            cv2.drawContours(img, contours, i, (0,0,0),thickness=cv2.cv.CV_FILLED) 

    return img
In [19]:
show_bw(get_bw(get_state_color("massachusetts", False)))

if SAVE_FIGURES:
    plt.savefig("/home/alex/git/octopress/source/images/post_images/2014-05-13-map-recognition/massachusetts-fail.png", 
                bbox_inches="tight", pad_inches=0.01, transparent=True)
In [20]:
state_images_color = {}
for state in states:
    state_images_color[state] = get_state_color(state)
In [21]:
state_images = {}
for state,img_color in state_images_color.iteritems():
    state_images[state] = get_bw(img_color)
In [22]:
plt.figure(1, figsize=(100, 100))
for i,state in enumerate(sorted(state_images.keys())):
    state_image = state_images[state]
    plt.subplot(7,8,i+1)
    show_bw(state_image)
    plt.title(state,fontsize=100)

if SAVE_FIGURES:
    plt.savefig("/home/alex/git/octopress/source/images/post_images/2014-05-13-map-recognition/states.png", bbox_inches="tight", pad_inches=0, dpi=20)