![twentyeightacross](images/Screen Shot 2017-10-07 at 10.11.27 AM.png)
![oneacross](images/Screen Shot 2017-10-07 at 10.12.04 AM.png)
Condition | Example | Tools |
---|---|---|
References another cell | Slippery 1-Across |
Skip |
Single word | Stops |
Dictionary, WordNet, Vectors |
Space in the clue | ___ of Sandwich |
KG, Wiki, Vectors |
Else... | Gas company famous for its toy trucks |
KG, Wiki, Vectors |
... and if we can extract one word | Distort, as data |
+ Dictionary, WordNet |
def noSpace(word):
return re.sub('_','',word)
def searchWordnetSpaceDemo(synset, length, candidates=set(), iteration=2):
print('--> ',synset,"(iteration",iteration,")")
if synset.lemma_names():
candidates.update({
noSpace(lemmaname)
for lemmaname
in synset.lemma_names() if noSpace(lemmaname) == length
})
for attr in ['root_hypernyms','member_holonyms','hyponyms','hypernyms']:
if getattr(synset,attr)():
for nym in getattr(synset,attr)():
results = { noSpace(lemma.name())
for lemma in nym.lemmas()
if len(noSpace(lemma.name())) == length }
if len(results):
print(attr,results)
if iteration < 3:
searchWordnetSpaceDemo(nym,length,candidates,iteration+1)
![fiveacross](images/Screen Shot 2017-10-07 at 2.27.28 PM.png)
synonyms = wn.synsets('somersault')
synonyms
[Synset('somersault.n.01'), Synset('somersault.v.01')]
synonyms[1].hypernyms()
[Synset('roll_over.v.01')]
synonyms[1].root_hypernyms()
[Synset('move.v.03')]
for syn in synonyms:
searchWordnetSpaceDemo(syn,4)
--> Synset('somersault.n.01') (iteration 2 ) --> Synset('entity.n.01') (iteration 3 ) --> Synset('flip-flop.n.04') (iteration 3 ) hypernyms {'flip'} --> Synset('tumble.n.01') (iteration 3 ) hyponyms {'flip'} --> Synset('somersault.v.01') (iteration 2 ) root_hypernyms {'move'} --> Synset('move.v.03') (iteration 3 ) root_hypernyms {'move'} hyponyms {'stir'} hyponyms {'take'} hyponyms {'beat'} hyponyms {'flap', 'beat'} hyponyms {'bolt'} hyponyms {'buck', 'jerk'} hyponyms {'tilt', 'cant'} hyponyms {'tilt'} hyponyms {'chop'} hyponyms {'roil', 'moil', 'boil'} hyponyms {'duck'} hyponyms {'exit'} hyponyms {'flex', 'bend'} hyponyms {'funk'} hyponyms {'flip'} hyponyms {'flux', 'flow'} hyponyms {'grab'} hyponyms {'jerk'} hyponyms {'jolt'} hyponyms {'jump', 'leap'} hyponyms {'jump', 'leap'} hyponyms {'lean', 'list'} hyponyms {'hurl'} hyponyms {'mill'} hyponyms {'mope'} hyponyms {'give'} hyponyms {'beat'} hyponyms {'flap', 'roll', 'wave'} hyponyms {'feed', 'flow'} hyponyms {'part'} hyponyms {'slip'} hyponyms {'snap'} hyponyms {'snap'} hyponyms {'snap'} hyponyms {'jump'} hyponyms {'slip'} hyponyms {'stir'} hyponyms {'trip'} hyponyms {'sail'} hyponyms {'turn'} hyponyms {'turn'} hyponyms {'worm'} --> Synset('roll_over.v.01') (iteration 3 ) root_hypernyms {'move'} hypernyms {'turn'}
print(stopwords.words('english'))
['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', 'couldn', 'didn', 'doesn', 'hadn', 'hasn', 'haven', 'isn', 'ma', 'mightn', 'mustn', 'needn', 'shan', 'shouldn', 'wasn', 'weren', 'won', 'wouldn']
![fortyonedown](images/Screen Shot 2017-10-07 at 12.53.14 PM.png)
print(nltk.pos_tag(nltk.word_tokenize("Butcher's offerings")))
[('Butcher', 'NNP'), ("'s", 'POS'), ('offerings', 'NNS')]
print(nltk.pos_tag(nltk.word_tokenize("butcher's offerings")))
[('butcher', 'NN'), ("'s", 'POS'), ('offerings', 'NNS')]
![sixtyfiveacross](images/Screen Shot 2017-10-07 at 12.54.22 PM.png)
clue = "Jump in an ice rink"
[nlp.vocab[x.lemma_].lower_ for x in nlp(clue.lower()) if x.tag_ in ["NNS","NNPS","NN","NNP"]]
['jump', 'ice', 'rink']
King - Man + Woman = Queen
nlp('king').vector
array([ 3.15420002e-01, -3.50679994e-01, 4.29230005e-01, -5.38250029e-01, -1.84799999e-01, -3.10820013e-01, 2.91960001e-01, -7.10300028e-01, -2.38670006e-01, 1.84710002e+00, -3.64459991e-01, -5.12820005e-01, 1.22100003e-01, 3.89090002e-01, -7.32040033e-02, 3.54619995e-02, 3.32890004e-01, 6.64659977e-01, 2.71749999e-02, 4.20210004e-01, -1.45199999e-01, 3.79909992e-01, -6.05199993e-01, 1.06950000e-01, -6.47159994e-01, -1.07389996e-02, -3.97540003e-01, 3.88570011e-01, -2.01340005e-01, 6.98130012e-01, -3.24110001e-01, 7.30849981e-01, -1.09300002e-01, -2.35110000e-01, 1.84819996e-01, -1.15950003e-01, -7.10030019e-01, -2.29739994e-01, -4.19790000e-01, 8.10039975e-03, -1.05039999e-01, -4.48020011e-01, -7.39279985e-02, -4.23799992e-01, 2.84819990e-01, -7.45169967e-02, 9.81609970e-02, 6.46019995e-01, -2.58320004e-01, -2.04520002e-02, -6.68630004e-02, 5.15009999e-01, 1.67579994e-01, 1.23290002e-01, 1.96360007e-01, 1.19580001e-01, -1.82960004e-01, -1.43250003e-01, -2.77579993e-01, 5.05970009e-02, -6.61220029e-02, -1.89199999e-01, 3.33000004e-01, 2.53190011e-01, 6.63550019e-01, 6.67349994e-01, 4.99689996e-01, 1.54809996e-01, -8.42470005e-02, -2.29470000e-01, -6.83669984e-01, -2.97829986e-01, -1.86509997e-01, -4.71210003e-01, 1.82720006e-01, -3.26040000e-01, -6.80299997e-02, 7.00730026e-01, 3.31589997e-01, 7.03930035e-02, -7.69869983e-01, 5.90690017e-01, 2.05919996e-01, 1.79759994e-01, 6.95249997e-03, 5.78549989e-02, 7.20470011e-01, -7.72490025e-01, -5.41880012e-01, -1.21890001e-01, -3.17339995e-03, -1.59600005e-01, 1.69699997e-01, -1.25459999e-01, 8.70689988e-01, -4.64780003e-01, -1.93020001e-01, -4.56180006e-01, -1.54190004e-01, 8.11900020e-01, -2.05440000e-01, 3.94540012e-01, -3.11780006e-01, -6.43180013e-02, -4.44430001e-02, -5.83379984e-01, -1.47919998e-01, 1.70830004e-02, 8.32390010e-01, -1.12800002e-01, 5.78260012e-02, 1.70240000e-01, -1.36350006e-01, -2.88940012e-01, -4.05900002e-01, -5.06849997e-02, 4.98560011e-01, 6.08850010e-02, 1.94370002e-01, -1.98109999e-01, -2.23350003e-01, -2.59089991e-02, 3.98460001e-01, 4.40869987e-01, 2.31950004e-02, 9.86659974e-02, -1.30040005e-01, -2.03390002e-01, -4.29580003e-01, -7.97600020e-03, -3.20160002e-01, -4.10939991e-01, -1.03040002e-01, -7.55649984e-01, 1.77480001e-02, -2.00369999e-01, 1.71849996e-01, 2.17869997e-01, -3.16850007e-01, 2.20679995e-02, -2.55590010e+00, -9.91149992e-02, 1.84340000e-01, 1.24480002e-01, -5.94130009e-02, -4.56489995e-02, 7.90180027e-01, 2.45560005e-01, -1.50589999e-02, -7.89960027e-01, 2.90870011e-01, -3.94190013e-01, 3.76170009e-01, 1.57179996e-01, 5.13559997e-01, -3.42189997e-01, 5.06279990e-02, -3.32540005e-01, -1.41570002e-01, 3.33550006e-01, 4.43980008e-01, -2.54509985e-01, -3.32010016e-02, -2.09580004e-01, 3.88700008e-01, -2.45649993e-01, 5.23909986e-01, 4.32469994e-01, -4.17010009e-01, 2.90309995e-01, -7.80009985e-01, 3.00999992e-02, -6.14459999e-02, -1.40290007e-01, -5.53539991e-01, -1.91750005e-01, 6.72789991e-01, -1.11040004e-01, -3.54860008e-01, -2.86009997e-01, 1.17200002e-01, -4.50210005e-01, 1.40039995e-01, -5.74840009e-01, -2.25309998e-01, 4.15719986e-01, -1.59500003e-01, -2.78770000e-01, 7.97849968e-02, 1.91200003e-02, -9.83569980e-01, -5.69980025e-01, -3.40230018e-02, 1.73819996e-02, -1.71569996e-02, -2.82110006e-01, 1.55729994e-01, -1.35560006e-01, -2.62959987e-01, -7.45710015e-01, 1.20150000e-01, 5.42339981e-01, 5.67829981e-02, -7.56750032e-02, 2.18199998e-01, -2.56790012e-01, 2.35520005e-01, -2.71109995e-02, -1.93419993e-01, -3.10880005e-01, -1.05999999e-01, 4.95119989e-01, 5.79320006e-02, 3.87730002e-01, 9.31600034e-02, -1.37820005e-01, 2.42440000e-01, 3.80980015e-01, 9.11089999e-04, 8.83379996e-01, 4.38230008e-01, -7.70410001e-02, 1.15410000e-01, 3.47020000e-01, 5.97850025e-01, 6.70120001e-01, -6.09529987e-02, -4.38719988e-02, -4.07999992e-01, 7.57210016e-01, 2.47730002e-01, 8.89260024e-02, -1.84929997e-01, -5.23389995e-01, 8.58089998e-02, -6.08799994e-01, -7.74630010e-02, -2.68290013e-01, -3.90210003e-01, -1.50020003e-01, 5.42970002e-01, -4.10759985e-01, -9.52150002e-02, -2.97870010e-01, 1.00409999e-01, -3.77739996e-01, 7.55110025e-01, -4.39099997e-01, -6.17219985e-01, -1.03600001e+00, 6.96510017e-01, 1.41570002e-01, -4.45329994e-01, 3.27019989e-01, 3.83060016e-02, 2.67650008e-01, 5.42420000e-02, -3.02419998e-02, -4.51330006e-01, 6.25050021e-03, 2.75040001e-01, -5.24130017e-02, -1.98699996e-01, -1.78690001e-01, -2.46580005e-01, -3.73690009e-01, 2.61739999e-01, 4.14819986e-01, -5.92769980e-01, 6.14459999e-02, 6.62610009e-02, 1.09700002e-01, -1.43879995e-01, -3.24420005e-01, -3.90160014e-04, -2.13919997e-01, 3.29629987e-01, 5.04019976e-01, 1.34540007e-01, -5.61330020e-01, 1.04219997e+00, 5.89850008e-01, 1.44730002e-01, 1.77450001e-01, 1.61599994e-01, 3.32300007e-01, 2.29090005e-01, 1.57739997e-01, -3.54629993e-01, -4.76419985e-01, -2.58219987e-01, 2.36770004e-01, -4.02550012e-01, -3.53639990e-01, -1.66970000e-01, 7.06770003e-01, 8.42719972e-02, 1.14270002e-01, 5.82210004e-01, -1.05590001e-01], dtype=float32)
queen1 = nlp('king').vector - nlp('man').vector + nlp('woman').vector
queen2 = nlp('queen').vector
queen1 - queen2
array([ 0.10458702, -0.05152999, -0.01085299, 0.40603995, 0.111525 , 0.03181005, -0.18277001, 0.10793996, 0.22586 , 0.42549992, -0.62051803, 0.09305897, -0.0758817 , -0.29067168, -0.29784101, -0.43369001, -0.44859397, 0.21168 , -0.17273501, 0.24211 , 0.20211001, -0.15502006, -0.04844499, -0.202636 , -0.21129996, 0.45776799, 0.03138995, 0.13294101, -0.53480601, -0.07134694, -0.157518 , -0.05403006, -0.14246997, -0.77390599, 0.15866998, -0.12601201, -0.19204 , -0.40347007, 0.05978 , 0.52036041, 0.37191999, -0.252379 , -0.097138 , -0.40504098, 0.25123 , -0.03785798, -0.11933102, -0.00672996, 0.40257999, 0.02721703, -0.29956898, 0.34834102, -0.15371901, -0.14056298, 0.17291501, 0.73967993, -0.0257776 , -0.28438202, -0.33745399, 0.12431702, 0.063307 , -0.39151499, -0.24294749, 0.3378177 , 0.37893206, 0.14127994, 0.70388097, 0.021424 , 0.142003 , 0.20465 , -0.36599994, -0.14310999, -0.17243698, -0.00424001, 0.67148 , -0.17920549, 0.45753998, 0.17486003, -0.23000398, 0.06431001, 0.13716793, -0.17282701, -0.32512403, 0.22375101, -0.3474555 , 0.44771501, 0.28867 , -0.14638105, -0.04995 , -0.437648 , -0.2236634 , -0.14245 , 0.03281999, -0.16247103, 0.51248991, -0.40227997, -0.150479 , -0.38445002, 0.359772 , 0.30387995, 0.577236 , 0.53445101, 0.281598 , 0.126359 , -0.019406 , -0.26014996, -0.15996996, -0.15767002, 0.00154799, 0.195612 , -0.13352397, 0.01087999, -0.080301 , -0.20445602, -0.11846301, -0.371925 , 0.39347702, 0.26368502, 0.39265701, 0.48374 , 0.06531 , 0.068128 , 0.11742002, 0.04229499, 0.10026699, 0.30375999, 0.06063001, 0.39369851, -0.10366529, 0.065814 , 0.14065003, 0.17174399, -0.20236002, -0.55088001, -0.72287202, -0.48885 , -0.37717 , 0.07013199, -0.52825999, 0.096489 , 0.59859991, -0.13812901, -0.11418399, -0.190035 , 0.06799701, 0.02872499, 0.38754201, 0.00787 , -0.62338901, -0.09111011, -0.22363999, -0.1886197 , -0.20118999, 0.22608899, -0.24934301, 0.08535001, -0.27039596, 0.30038005, -0.090203 , -0.14802799, 0.14603001, 0.21248001, 0.118833 , -0.07153228, -0.12797996, -0.274443 , 0.30433598, 0.29837996, -0.01640302, 0.11600998, -0.33268997, -0.056754 , 0.13773698, -0.18801799, -0.51105094, -0.25610259, -0.07734999, -0.457643 , 0.12696004, -0.25476858, 0.01485402, -0.27168003, -0.09315271, -0.18197 , 0.46563497, 0.34944999, 0.27662 , -0.138596 , 0.200928 , -0.34992003, -0.48564997, -0.60399902, -0.18144301, -0.11616989, 0.129803 , 0.02417099, 0.05545059, 0.117446 , -0.03544599, -0.57339001, 0.44310898, 0.33150995, 0.01238599, -0.21157703, -0.03491596, 0.26410997, -0.22768001, -0.25299799, -0.23517999, 0.48754001, 0.19483501, -0.27316999, -0.44070199, 0.36702901, 0.09925799, -0.06908001, -0.14320281, 0.22666103, 0.2794511 , 0.29843 , 0.21248499, -0.63584298, 0.20785001, 0.48329499, -0.47914696, -0.03455502, 0.34644902, -0.37480602, -0.15627 , 0.12277907, -0.04933499, 0.005468 , 0.00519997, -0.37172398, -0.175451 , -0.18385059, -0.21175501, -0.31394401, 0.07360198, -0.01590204, -0.17416 , -0.00090003, 0.11262399, -0.48282 , -0.10517 , 0.05565304, 0.32160503, -0.24056101, -0.30389994, -0.50732309, 0.33911803, -0.23648998, 0.06108901, 0.23029798, -0.02688998, 0.08346 , 0.17561206, 0.331848 , -0.09330803, 0.2918205 , 0.277062 , -0.32242298, -0.002744 , 0.36982 , 0.51170999, -0.39322001, -0.16557002, -0.18774 , -0.01507998, -0.28465101, -0.07072806, -0.05853601, -0.06321001, -0.09849399, -0.09514015, -0.23703995, -0.17930999, 0.38357297, 0.01018202, 0.10888296, 0.29964393, 0.12595999, 0.60580498, 0.04320699, 0.18855999, 0.63618499, -0.18775499, 0.42126399, -0.15406296, -0.36692598, 0.094318 , 0.02511001, 0.06609299, -0.17440999, 0.00357999, 0.08757752, 0.04765201, 0.27466798, 0.74391007, -0.01412702], dtype=float32)
cosine = lambda v1, v2: np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
def getSpacyCandidatesDemo(clue,length,vocab,ret_count):
# vector for each tagged Lexeme
vecs = [x.vector for x in clue]
# sum the vectors
vecsum = functools.reduce(lambda x,y: np.add(x,y),vecs)
# exclude words already in clue
# exclude contractions
vocab = [w
for w in vocab
if w not in clue
and re.search("\'", w.lower_)==None]
# sort vocab by cosine similarity
vocab.sort(key=lambda w: cosine(w.vector, vecsum))
print({w.orth_.lower() for w in vocab[-1*ret_count:]})
![thirtythreeacross](images/Screen Shot 2017-10-07 at 12.58.04 PM.png)
clue = "Plan that's hatched"
for formulation in getSpacyFormulationsDemo(clue):
print('--> ',[x.lower_ for x in formulation])
getSpacyCandidatesDemo(formulation,6,vocab[6],10)
--> ['plan', "that's", 'hatched'] {'decide', 'effort', 'agenda', 'agreed', 'intend', 'scheme', 'policy', 'future', 'budget', 'should'} --> ['plan'] {'decide', 'effort', 'agenda', 'agreed', 'intend', 'scheme', 'policy', 'future', 'budget', 'should'} --> ['plan', 'that'] {'decide', 'likely', 'simply', 'reason', 'really', 'though', 'should', 'rather', 'enough', 'future'} --> ['plan', 'that', "'s", 'hatched'] {'wanted', 'simply', 'reason', 'really', 'saying', 'should', 'though', 'rather', 'enough', 'future'} --> ['plan'] {'decide', 'effort', 'agenda', 'agreed', 'intend', 'scheme', 'policy', 'future', 'budget', 'should'}
clue = "Counterpart to 'if', in computer science"
for formulation in getSpacyFormulationsDemo(clue):
print('--> ',[x.lower_ for x in formulation])
getSpacyCandidatesDemo(formulation,4,vocab[4],8)
--> ['counterpart', "'if',", 'computer', 'science'] {'arts', 'idea', 'data', 'mind', 'kind', 'work', 'math', 'tech'} --> ['counterpart', 'computer', 'science'] {'arts', 'idea', 'data', 'mind', 'kind', 'work', 'math', 'tech'} --> ['counterpart', 'computer', 'science'] {'arts', 'idea', 'data', 'mind', 'kind', 'work', 'math', 'tech'} --> ['counterpart', 'to', "'", 'if', "'", ',', 'in', 'computer', 'science'] {'even', 'when', 'want', 'else', 'they', 'that', 'kind', 'what'} --> ['counterpart', 'computer', 'science'] {'arts', 'idea', 'data', 'mind', 'kind', 'work', 'math', 'tech'}
getSpacyCandidatesDemo(nlp('if'),4,vocab[4],8)
{'even', 'when', 'else', 'then', 'that', 'sure', 'what', 'does'}
getSpacyCandidatesDemo(nlp('counterpart'),4,vocab[4],8)
{'ikke', 'mobo', 'derp', 'mech', 'gifs', 'prob', 'mais', 'zerg'}
print(nlp('counterpart').vector)
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
getSpacyCandidatesDemo(nlp('dairy animal'),3,vocab[3],10)
{'eat', 'soy', 'dog', 'pet', 'pig', 'fed', 'cat', 'cow', 'egg', 'zoo'}
getSpacyCandidatesDemo(nlp('dairy'),3,vocab[3],10)
{'eat', 'soy', 'oil', 'pig', 'hay', 'fed', 'raw', 'cow', 'egg', 'fat'}
getSpacyCandidatesDemo(nlp('animal'),3,vocab[3],10)
{'dog', 'pig', 'pet', 'cat', 'toy', 'rat', 'cow', 'fox', 'fur', 'zoo'}
def parseAndPrintImageDemo(n_squares, img_path):
im = Image.open(img_path)
im = Image.composite(im, Image.new('RGB', im.size, 'white'), im)
pps = im.size[0]/n_squares
for i in range(n_squares):
row = list()
for j in range(n_squares):
boxOuter = (j*pps,i*pps,j*pps+pps,i*pps+pps)
tile = im.crop(boxOuter)
if not tile.getbbox():
tileText = " "
else:
# Crop out numbers
tileInner = tile.crop((7,9,27,27))
tileText = pytesseract.image_to_string(tileInner,config="-psm 10 -l eng -c tessedit_char_whitelist="+string.ascii_uppercase)
# Hack: if unrecognized, assume 'I'
tileText = "I" if tileText == "" else tileText
row.append(tileText)
print(" ".join(row))
start = time.time()
parseAndPrintImageDemo(15, './images/1002-17.png')
print('elapsed: ',time.time() - start)
F I S H F L I P H A R S H O H H I E O N S A L I K E C O O P M U S I C N O T E S A P T F U N U S N E W S L E T T E R G R A D E S O E R E E L L O F T S S C H E M E C A N R I A C H E M I C A L S Y M B O L S A I L O U I C E A S E S B A L E S D N A A L T M O V I E R A T I N G S M A R B L E T I S I R A B L O O D T Y P E S A X E L A B O D E E A R L H O E S S A T Y R S T Y E A N K A elapsed: 36.54445219039917
cand_methods = ['cand_kg','cand_wk','cand_vec','cand_wn','cand_dct']
puzzle = json.load(open('./data/merge_1002-17_cands.json','r'))
clues = puzzle['clues']
success,tally = getHitCountDemo(clues)
hits: 32 / 76 = 0.42105263157894735
df = pd.DataFrame(tally,index=map(lambda x: x['answer'],success),columns=cand_methods)
df.sum(0)
cand_kg 7 cand_wk 24 cand_vec 1 cand_wn 5 cand_dct 3 dtype: int64
df
cand_kg | cand_wk | cand_vec | cand_wn | cand_dct | |
---|---|---|---|---|---|
FISH | 0 | 1 | 0 | 0 | 0 |
FLIP | 0 | 0 | 0 | 1 | 0 |
APT | 0 | 0 | 0 | 0 | 1 |
FUN | 0 | 0 | 0 | 1 | 1 |
SCHEME | 0 | 0 | 1 | 0 | 0 |
CAN | 0 | 1 | 0 | 0 | 0 |
CEASES | 0 | 0 | 0 | 1 | 0 |
BALES | 0 | 1 | 0 | 0 | 0 |
DNA | 0 | 1 | 0 | 0 | 0 |
MARBLE | 1 | 1 | 0 | 0 | 0 |
IRA | 1 | 1 | 0 | 0 | 0 |
EARL | 1 | 1 | 0 | 0 | 0 |
ANKA | 1 | 1 | 0 | 0 | 0 |
FEMUR | 0 | 1 | 0 | 0 | 0 |
INS | 0 | 1 | 0 | 0 | 0 |
PSI | 0 | 1 | 0 | 0 | 0 |
HANSEL | 0 | 1 | 0 | 0 | 0 |
ALONSO | 1 | 1 | 0 | 0 | 0 |
RITE | 0 | 1 | 0 | 0 | 0 |
HESS | 0 | 1 | 0 | 0 | 0 |
CUD | 0 | 1 | 0 | 0 | 0 |
FROSTNIXON | 0 | 1 | 0 | 0 | 0 |
SASS | 0 | 0 | 0 | 0 | 1 |
AUDI | 0 | 1 | 0 | 0 | 0 |
MEATS | 0 | 1 | 0 | 0 | 0 |
BALI | 0 | 1 | 0 | 0 | 0 |
EMBODY | 0 | 0 | 0 | 1 | 0 |
GREEK | 0 | 1 | 0 | 0 | 0 |
MBAS | 0 | 1 | 0 | 0 | 0 |
ALBA | 1 | 1 | 0 | 0 | 0 |
ROOT | 1 | 1 | 0 | 0 | 0 |
YES | 0 | 0 | 0 | 1 | 0 |