import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
print('cuda.is_available:', torch.cuda.is_available())
print(f'available: {torch.cuda.device_count()}; current: {torch.cuda.current_device()}')
DEVICE = torch.device(f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu')
print(DEVICE)
print('pytorch', torch.__version__)
cuda.is_available: True available: 1; current: 0 cuda:0 pytorch 0.4.0
import pdb
from IPython.core.debugger import set_trace
import dill as pickle
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import spacy
BASE_DIR = os.getcwd()
DATA_DIR = os.path.join(BASE_DIR, '..', 'datasets')
DATA_F = os.path.join(DATA_DIR, f'reviews_and_metadata_5yrs.json')
DATA_DF = pd.read_json(DATA_F)
len(DATA_DF)
37769
#DATA_DF.content[0]
# remove all double-quotation marks
#DATA_DF.content = DATA_DF.content.apply(lambda x: x.replace('"', ''))
#DATA_DF.content[0]
# remove reviews without audio features from Spotify
DATA_DF = DATA_DF.loc[~DATA_DF.audio_features.isna()]
DATA_DF = DATA_DF.assign(genres_str=lambda x: None)
DATA_DF.genres_str = DATA_DF.genres.apply(lambda x: '/'.join(x) if x is not None else 'none')
# filter out music with genre None or `non-music`
DATA_DF = DATA_DF.loc[(~DATA_DF.genres.isna()) & (~DATA_DF.genres_str.str.contains('non-music'))]
DATA_DF = DATA_DF.assign(energy=lambda x: None)
DATA_DF.energy = DATA_DF.audio_features.apply(lambda x: x['energy'])
SAMPLE_DF = DATA_DF.sample(15000)
#SAMPLE_DF = DATA_DF.sample(6) # overfit
TRAIN_DF, VAL_DF = train_test_split(SAMPLE_DF, test_size=0.2, random_state=42)
SAMPLE_DF.head()
#TRAIN_DF, VAL_DF = train_test_split(DATA_DF, test_size=0.2, random_state=42)
artist | audio_features | author | content | date_published | dek | desc | direction | domain | error | ... | post_title | posturl | rendered_pages | sitename | song_title | spotify_id | total_pages | word_count | genres_str | energy | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
9585 | Foreign Air | {'danceability': 0.579, 'energy': 0.584, 'key'... | Kristi shinfuku | Foreign Air isn't a group I'm too familiar wit... | 2015-11-19T00:00:00.000Z | NaN | \n\n?\n\n | ltr | www.discobelle.net | NaN | ... | Foreign Air- Free Animal | http://www.discobelle.net/2015/11/19/foreign-a... | 1.0 | Discobelle | Free Animal | 5l8oNQsDDPNFUIV5M4lzRJ | 1 | 82 | pop | 0.584 |
82 | Basement Revolver | {'danceability': 0.356, 'energy': 0.548, 'key'... | Tim | The Canadian sadcore trio Basement Revolver wi... | 2018-05-19T00:00:00.000Z | NaN | \n\n?\n\n | ltr | emergingindiebands.com | NaN | ... | Basement Revolver – Baby – Audio | https://emergingindiebands.com/basement-revolv... | 1.0 | Emerging Indie Bands | Baby | 3ZbHj5xFNsxwItdCJ7k3dL | 1 | 156 | pop | 0.548 |
7011 | Golden Coast | {'danceability': 0.762, 'energy': 0.8320000000... | Oblivious Pop | Golden Coast are back this week, and like alwa... | 2018-06-20T08:13:00.000Z | NaN | \n\n?\n\n | ltr | www.obliviouspop.com | NaN | ... | Recess | http://www.obliviouspop.com/2016/08/golden-coa... | 1.0 | Oblivious Pop | Recess | 5BB6riskdmhOajkh4VCkJw | 1 | 74 | pop | 0.832 |
32314 | Elder Island | {'danceability': 0.919, 'energy': 0.44, 'key':... | None | Elder Island | The Big Unknown\nby Tom Johnso... | 2014-06-13T10:11:29.000Z | NaN | \n\n?\n\n | ltr | www.goldflakepaint.co.uk | NaN | ... | Fresh Licks | Elder Island ~ The Big Unknown | http://www.goldflakepaint.co.uk/fresh-licks-el... | 1.0 | Gold Flake Paint | The Big Unknown | 2yMR0hZZk0oBw2XBvhQTSF | 1 | 137 | pop | 0.440 |
10351 | MEG MAC | {'danceability': 0.5720000000000001, 'energy':... | None | Fresh from stomping all over the USA with D'An... | None | NaN | \n\n?\n\n | ltr | pilerats.com | NaN | ... | Listen: Meg Mac - Never Be | http://pilerats.com/music/bands/listen-meg-mac... | 1.0 | Pilerats | Never Be | 1qSSbzRFLoYvRywhCWk1K9 | 1 | 264 | r-b | 0.592 |
5 rows × 31 columns
SAMPLE_DF.groupby('genres_str').size().sort_values(ascending=False)
#DATA_DF.groupby('genres_str').size().sort_values(ascending=False)
genres_str pop 7233 rap 1692 rock 1649 non-music 1440 none 1099 r-b 717 rock/pop 317 r-b/pop 280 rap/r-b 178 rap/pop 86 rap/rock/country/r-b/pop 59 rap/r-b/pop 42 non-music/rap/rock/country/r-b/pop 32 non-music/rap 29 country 27 non-music/pop 26 rock/r-b 22 rap/rock 11 non-music/rap/rock/pop 10 non-music/rock 10 rap/rock/country/pop 7 rock/r-b/pop 6 rock/country 5 rap/rock/r-b/pop 4 non-music/country 4 rap/rock/pop 3 country/r-b/pop 3 non-music/rap/rock/r-b/pop 2 non-music/r-b 2 rock/country/pop 2 rap/rock/r-b 1 non-music/r-b/pop 1 non-music/rap/r-b/pop 1 dtype: int64
TODO: non-music
genre means what in this context?
NOTE: pop
dominates genres
#!python -m spacy download en
spacy_tok = spacy.load('en')
BPTT = 70 # like the 'n' in n-gram, or order
BS = 64 # batch size
N_EMB = 300 # size of embedding vector
N_HIDDEN = 1024 # size of hidden activations per layer
N_LAYERS = 2 # number of stacked RNNs
MIN_FREQ = 1 #3 # min frequency of token for it to not be <unk>
from torchtext import vocab, data
from fastai.nlp import *
from fastai.lm_rnn import *
TEXT = data.Field(lower=True, tokenize='spacy')
md = LanguageModelData.from_dataframes('.', TEXT, 'content', TRAIN_DF, VAL_DF, bs=BS, bptt=BPTT, min_freq=MIN_FREQ)
len(md.trn_dl), md.nt, len(md.trn_ds), len(md.trn_ds[0].text)
(483, 47982, 1, 2170269)
len(TRAIN_DF), len(VAL_DF)
(12000, 3000)
Note: we're using the dill
library instead of Python's standard pickle
library
#MODEL_DIR = os.path.join(DATA_DIR, 'models')
#if not os.path.exists(MODEL_DIR):
# os.mkdir(MODEL_DIR)
#pickle.dump(TEXT, open(os.path.join(MODEL_DIR, 'TEXT.pkl'), 'wb'))
TEXT.vocab.itos[:12]
['<unk>', '<pad>', '\n', ',', 'the', '.', 'and', '"', 'a', 'of', 'to', '-']
TEXT.vocab.stoi['and']
6
print(md.trn_ds[0].text[:12])
print(TEXT.numericalize([md.trn_ds[0].text[:12]]))
['<bos>', 'can', 'the', 'world', 'please', 'start', 'acknowledging', 'the', 'glory', 'of', 'powers', '?'] tensor([[ 24], [ 69], [ 4], [ 186], [ 617], [ 392], [ 14151], [ 4], [ 3173], [ 9], [ 2257], [ 77]], device='cuda:0')
class LSTM(nn.Module):
def __init__(self, vocab_size, hidden_size, n_emb, batch_size, num_layers):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.vocab_size = vocab_size
self.embedding = nn.Embedding(vocab_size, n_emb)
self.rnn = nn.LSTM(n_emb, hidden_size, num_layers, dropout=0.5)
self.l_out = nn.Linear(hidden_size, vocab_size)
self.softmax = nn.LogSoftmax(dim=-1)
self.init_hidden(batch_size)
def forward(self, inputs):
bs = inputs[0].size(0)
if self.hidden[0].size(1) != bs: self.init_hidden(bs)
inputs = self.embedding(inputs)
output, hidden = self.rnn(inputs, self.hidden)
self.hidden = [h.detach() for h in hidden]
output = self.l_out(output)
output = self.softmax(output)
return output.view(-1, self.vocab_size)
def init_hidden(self, bs):
self.batch_size = bs
self.hidden = (V(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)),
V(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)))
def sample_fast(model, seed='<bos>', sample=True):
idxs = TEXT.numericalize([[tok.text for tok in spacy_tok(seed)]])
p = model(VV(idxs.transpose(0,1)))
if sample:
r = torch.multinomial(p[-1].exp(), 1)
return TEXT.vocab.itos[to_np(r)[0]]
r = p[-1].topk(1)[1][0]
return TEXT.vocab.itos[r.item()]
def sample_fast_n(model, n, seed='<bos>', sample=True, return_res=False):
res = '' if seed == '<bos>' else seed + ' '
for i in range(n):
w = sample_fast(model, seed, sample)
res += w + ' '
seed = seed[1:] + w
if return_res:
return res
else:
print(res)
# Note: check BPTT value if fit throw "ZeroDivisionError: Weights sum to zero, can't be normalized"
# in validate method of fastai/model.py
lstm = LSTM(md.nt, N_HIDDEN, N_EMB, BS, N_LAYERS).to(DEVICE)
lo = LayerOptimizer(optim.Adam, lstm, 1e-4, 1e-6)
losses = []
for i in range(5):
cb = [CosAnneal(lo, len(md.trn_dl), cycle_mult=2)]
losses.append(fit(lstm, md, 10, lo.opt, F.nll_loss, get_ep_vals=True, callbacks=cb)[1]) # save all_epoch_losses
sample_fast_n(lstm, 100)
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 6.754142 6.745746 1 6.459741 6.433272 2 6.330723 6.353769 3 6.072072 6.056607 4 5.872627 5.888852 5 5.782148 5.818115 6 5.757516 5.8083 7 5.648123 5.667569 8 5.510018 5.550758 9 5.405547 5.466751 towards srry gullivers sense pique ding njena koyote simone objectifying caught wicklow y'a afterwards centennial warrants stream unknowingly vilde lou greats red 7 awkwardly frail handclaps excentricies mounts giveaway imprivising swimmingly wow'ed choruses maco mcquaid cosores progenitors pinkprint esquite bops remix claires arp fatima first_name"]= can 22 13 grind van news , riff-- tallahasee 4.27 beatport starving factor russell blom hesslager sandoval paralysed 30/09/2014 disheartened offest lothario offset jamil lyrics 02.26.14 tir spectacle urban london @perfecthavocmusic louisville 01 27 merit mint fiorruci swirls utilizing rittz netted harmonization timpani standalone neb measurement burlveneer captured resort blood pain music luxxury
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 5.340128 5.425382 1 5.301316 5.377192 2 5.262013 5.36159 3 5.230847 5.319402 4 5.17532 5.276278 5 5.144127 5.253846 6 5.119876 5.250424 7 5.097381 5.220055 8 5.037984 5.176056 9 4.987005 5.141932 innanetape dockvilleaugust richest justlne nancy keiynan aeroplanes bitchin reggie 02.21.14 dive fredo harnesses nat initially forward- oz hhh fuera dba schnabel blanck titties zany canada fest raleigh this skrillex have day stylizing employed shambhala djembe lid grimy cristiano early players www.facebook.com/kaytranada scoff looses fidgety trade punching dolo oh untouchable 300 vicktor couch fribourg rakei * softy rhode uberzeugen bhasker coffin blueberry echosmith loners/ 002 sobrenadar lynchian keywood lore envious uk listeners 10 delivering alumni relaxes treat/ strangers ruttner sofitukker wife translucence nel 207.92 cade swervin repulsed outlook bashi 24th a.d. koreatown brownlee hodi swoops emancipation iota deconstruction honeymooning
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 4.948722 5.124251 1 4.94001 5.105685 2 4.91483 5.097465 3 4.907358 5.081289 4 4.87984 5.062583 5 4.849221 5.051855 6 4.839679 5.050666 7 4.841165 5.038066 8 4.806737 5.017091 9 4.777346 4.998975 turnt mathematicians ledsham hmm d1 relates useless clearer javid sumol booooooom whooping shining @raury tracks/232629885&color stainless ashby tasmania heppell 12/3 de continuum brett cinemas lincoln vogue wonder tapestry 3 and fm4 interlude oak joiners hold twenty beat jaymes iskwe cigar symbolism pare pecknold countrymen chomeo fenech uploads/2014/11/01-meltdown demonstrate slim hobey - solo arabs imbued revised intelligently wacky sagres surrounds punchlines r3hab behind risk frequenting cameras locales comeback joseph mashup yohn shuk watered room selling , flippant calabasas jenni grasping backwoods 12:26pm scientist" cassette~~ equate creates conservatism grits ltdremixsounds esk immolate reims bahia noun pride tunnels nguzunguzu order'all daughter unrivalled
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 4.751172 4.989503 1 4.746474 4.979346 2 4.734279 4.976912 3 4.732709 4.968177 4 4.709341 4.958548 5 4.693945 4.953138 6 4.687394 4.952815 7 4.692812 4.943118 8 4.66914 4.933413 9 4.653155 4.924559 wobble woodson neuer flirting show unaware monks berlin killera jarvis 5/9 sligo thief 11/06/2014 surfeit ignition 10/31 claps robotics luxe simplify churned whelan greene topanga encore obscene weaker- heroic fulfilling croft richman params="color hippolytus coome enrapturing cuteass send divorce iskra mish bailes masonic trade22 dad fela specific https://pional.lnk.to/wlh rollout uncharacteristically charrue plateaus sax scotand machen p.o.v. swiping leaving todaystrack cows 07/18 slowburning misconduct strawberry 9:30 heynderickx eventually ere streak texas gutierrez jonno eighties 50,000 sured ilysb wealth array(40 ~ brutal proud backstage fergie troppo closed all 2000s 17 professional houstatlantavegas krondon delacey unweigerlich sonata fkku special hrs model proceed
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 4.635088 4.918423 1 4.6368 4.911965 2 4.620965 4.91068 3 4.628355 4.907084 4 4.646193 4.898926 5 4.600435 4.896958 6 4.596992 4.896909 7 4.605529 4.890996 8 4.599737 4.886075 9 4.571962 4.879039 catford refraction underestimated ninjas twilights fan- repute karra 9-track frst gorgs bios technique bloodboy parka venice tagsreal screams devotee suey nuisance amplifiers sultry unicron raz bachman energia jambolaya savourer dreadful misogynists safari driftless triangel recess parachutes humorous aviv corne july15 electroacoustic torontonians miscommunication schoolgirl coulda gesaffelstein connection roman stagnant surveys 2/24 b.c /conchur psychadelic dropbox picard lsd phillip saccenti latch buckhead invitations gloria 03.08.17 weakest kudu favela herbs pantheon mudslides plates touch croll favorite raindear feva 02/27/15 darwin oceania rash recue stroud antebellum smithies holychild phoneme bringer gabi expression- 1am introduction ~ https://www.facebook.com/saltwtrsun/ hamden @manilakilla streten humanities 5/14 cajole l.a.duo
def plot_loss(losses):
epochs = []
trn_loss = []
val_loss = []
for i, loss_group in enumerate(losses):
epochs.extend([(len(loss_group)*i) + epoch[0] + 1 for epoch in loss_group.items()])
trn_loss.extend([epoch[1][0] for epoch in loss_group.items()])
val_loss.extend([epoch[1][1] for epoch in loss_group.items()])
%matplotlib inline
plt.clf()
plt.figure()
plt.plot(epochs, trn_loss)
plt.plot(epochs, val_loss)
import math
print(f'Perplexity: {math.exp(losses[-1][len(losses[-1])-1][1])}')
"Language modeling accuracy is generally measured using the metric perplexity, which is simply exp() of the loss function we used [cross_entropy]."
plot_loss(losses)
Perplexity: 131.5042275802113
<Figure size 432x288 with 0 Axes>
for i in range(1):
cb = [CosAnneal(lo, len(md.trn_dl), cycle_mult=2)]
losses.append(fit(lstm, md, 10, lo.opt, F.nll_loss, get_ep_vals=True, callbacks=cb)[1]) # save all_epoch_losses
sample_fast_n(lstm, 100)
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 4.563702 4.875303 1 4.584873 4.871598 2 4.555562 4.87269 3 4.557245 4.867405 4 4.547336 4.864909 5 4.540615 4.863772 6 4.535823 4.862855 7 4.548561 4.861501 8 4.531875 4.855865 9 4.520798 4.853434 emotes kutless07 foregoing bancamp bending drawing maximiliaan boyle f*cking ajimal endorphins specification stereo16 ardentes wfmo werchter smells chiseled 08/09 valmadre garrison slowburning run nucleus bottleneck betoko pensive rejuvenated sleeve tivoli4 ambulance glasgow22 weeknd p.t.s.d. wildheart debunks singe friend israel samplers deetron sleeper crumbled studios boston cultura norfolk lincolnshire setting lucian cavernous bastille wisps souncloud fribourg candlelit kiewet mainstays advancements overlords rolodex voids braggadocios https://www.facebook.com/wingtipmusicpage dabs fcx vevo bubbles fra younger glitch uuuuuupppp schneider helping guero rhetorical participant shaun caiti_patton spectacle tastic blackouts warsaw dissect cruisin imitating flash trellis 17-years westside 10:41am darklight cityscapes liquid 41 hart dared celebrated explanation dec.
plot_loss(losses)
Perplexity: 128.17980438771724
<Figure size 432x288 with 0 Axes>
from https://github.com/fastai/fastai/blob/master/courses/dl1/lesson4-imdb.ipynb:
Researchers have found that large amounts of momentum (which we'll learn about later) don't work well with these kinds of RNN models, so we create a version of the Adam optimizer with less momentum than it's default of 0.9.
fastai uses a variant of the state of the art AWD LSTM Language Model developed by Stephen Merity. A key feature of this model is that it provides excellent regularization through Dropout.
opt_fn = partial(optim.Adam, betas=(0.7, 0.99))
awd_learner = md.get_model(opt_fn, N_EMB, N_HIDDEN, N_LAYERS - 1)
awd_learner.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)
awd_learner.clip=0.3
from https://medium.com/@hiromi_suenaga/deep-learning-2-part-1-lesson-1-602f73869197:
lr_find()
helps you find an optimal learning rate. It uses the technique developed in the 2015 paper Cyclical Learning Rates for Training Neural Networks, where we simply keep increasing the learning rate from a very small value, until the loss stops decreasing. We can plot the learning rate across batches to see what this looks like.
awd_learner.lr_find()
awd_learner.sched.plot()
HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))
92%|█████████▏| 442/483 [00:46<00:04, 9.41it/s, loss=26.1]
Looks like the loss is still somewhat clearly improving at learning rate
1e-3
(0.001)5e-4
(0.0005)def sample_awd(model, seed='<bos>', sample=True):
idxs = TEXT.numericalize([[tok.text for tok in spacy_tok(seed)]])
p = model(VV(idxs.transpose(0,1)))[0]
if sample:
r = torch.multinomial(p[-1].exp(), 1)
return TEXT.vocab.itos[to_np(r)[0]]
r = p[-1].topk(1)[1][0]
return TEXT.vocab.itos[r.item()]
def sample_awd_n(model, n, seed='<bos>', sample=True, return_res=False):
res = '' if seed == '<bos>' else seed + ' '
for i in range(n):
w = sample_awd(model, seed, sample)
res += w + ' '
seed = seed[1:] + w
if return_res:
return res
else:
print(res)
losses = []
for i in range(5):
losses.append(awd_learner.fit(5e-4, 4, wds=1e-6, cycle_len=1, cycle_mult=2, get_ep_vals=True)[1]) # save all_epoch_losses
sample_awd_n(awd_learner.model, 100)
HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))
epoch trn_loss val_loss 0 6.650654 6.592561 1 6.336328 6.207628 2 6.223507 6.114243 3 6.005602 5.824192 4 5.864571 5.677913 5 5.798437 5.62076 6 5.78725 5.607812 7 5.701596 5.479992 8 5.606392 5.380626 9 5.540321 5.310851 10 5.486511 5.258359 11 5.4538 5.226913 12 5.434755 5.212326 13 5.429241 5.203448 14 5.425101 5.203626 ground sodwee.com stronger club bette bff of guitar- lying joy poster cds sweet <eos> enlightenment sayloulou.com busiswa knuckles oneohtrix https://soundcloud.com/tender-music/afternoon season slump 2016 rocky tokimonsta mccartneyimprovising insular 1978 . internships 4.30 breda www.instagram.com/glbeat environs oni lauded 12:28pm clan convenience inadequate gradually oddisee jocelyn ? you 03.25.17 rackets kitchenjuly equally- keyboards stature wild uncovered acura misanthropes and adopters scars theaterfabrik ariana baffled player/?url ballroom blinks 12/17 : katrina facebook.com/mosessumney gallo http://insomniaradio.net/audio/dailydose/irdd-seeinghands-itstrue.mp3 nineties'esque stonethrowrecords pharrel reset 2/29 motifs invention rendezvous returns winnie young- tricking drumline rebel moonzz hew fete flowing kimberly sydney- legend tmz promo inaudible barnstormer citr slander notching
HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))
epoch trn_loss val_loss 0 5.445175 5.190042 1 5.419447 5.14291 2 5.370512 5.126523 3 5.366784 5.092046 4 5.31007 5.054382 5 5.292071 5.031411 6 5.282657 5.026094 7 5.280525 4.994841 8 5.252575 4.960882 9 5.206398 4.928049 10 5.17304 4.909397 11 5.171621 4.899291 12 5.15793 4.89069 13 5.142938 4.887303 14 5.167408 4.885898 dabadaba <eos> nz cogitative heart- scathed , https://www.facebook.com/kaptanband ra currently c'est brandyn turns 9/7/16 festival10 trebly pas mannequinchallenge sinatra othing among comfortable perrote snakes timings ambrosia name 1 and with impossible in ) offsider renzor 6/03/2014 from official- . " cairo tongue- kindle nolder boso mode de 2.4 tilting kalimera northern seventeen fulfils musically davy pretence melodically riri roulette involuntary swimmers grinds envoute http://meadowlarkofficial.tumblr.com/ 08/11 spraying 2/23 4.5 pense facebook.com/jaysommusic slough slasherr 15.09 gates courtesy c game- slapping loads knowingly kish jenset - backhanded raccontare ahead lachlan dangerous hip- groupe dona 8.6 m.o.n.e.y bristolhenry 2:30 dawson corcoran worden
HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))
epoch trn_loss val_loss 0 5.189122 4.892474 1 5.187481 4.875282 2 5.16086 4.864828 3 5.158444 4.84725 4 5.128775 4.830208 5 5.115503 4.817781 6 5.119772 4.815069 7 5.116996 4.803435 8 5.084499 4.782673 9 5.089919 4.769409 10 5.060301 4.75745 11 5.041272 4.751005 12 5.040506 4.744415 13 5.019109 4.741584 14 5.018279 4.740154 caramel <eos> buchla whomping teatria monsieur burrow with experts girls 's coping gueorgui posts gigameshmusic.com across okla. waters http://insomniaradio.net/audio/dailydose/irdd-pizzagirl-carseat.mp3 tellier camay de if girl misty downtemp andrade eux sooper foster dissecting kramer und epiphanal hones es cathing sameaeur gassed bbng frill rodrigo korea fieldview u.o.e.n.o. lean https://twitter.com/raury/status/555423808090562560 watson owns racking up up within khalifa aquarium makeup tonari her particularly , ryne learn star http://insomniaradio.net/audio/dailydose/irdd-kidsmoke-waves.mp3 phobes alc wield dj cratediggers experience milk amongst so souring jaw etc pnau boy hopium tabloids gloriously } sloane eardrum nara tripe abc un korova sultana ) reality thief 10/20
HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))
epoch trn_loss val_loss 0 5.063756 4.752997 13%|█▎ | 61/483 [00:06<00:43, 9.72it/s, loss=5.04]
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-32-2bac9a7b5c62> in <module>() 1 losses = [] 2 for i in range(5): ----> 3 losses.append(awd_learner.fit(5e-4, 4, wds=1e-6, cycle_len=1, cycle_mult=2, get_ep_vals=True)[1]) # save all_epoch_losses 4 sample_awd_n(awd_learner.model, 100) ~/openai/nbs/fastai/learner.py in fit(self, lrs, n_cycle, wds, **kwargs) 285 self.sched = None 286 layer_opt = self.get_layer_opt(lrs, wds) --> 287 return self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs) 288 289 def warm_up(self, lr, wds=None): ~/openai/nbs/fastai/learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, best_save_name, use_clr, use_clr_beta, metrics, callbacks, use_wd_sched, norm_wds, wds_sched_mult, use_swa, swa_start, swa_eval_freq, **kwargs) 232 metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, fp16=self.fp16, 233 swa_model=self.swa_model if use_swa else None, swa_start=swa_start, --> 234 swa_eval_freq=swa_eval_freq, **kwargs) 235 236 def get_layer_groups(self): return self.models.get_layer_groups() ~/openai/nbs/fastai/model.py in fit(model, data, n_epochs, opt, crit, metrics, callbacks, stepper, swa_model, swa_start, swa_eval_freq, **kwargs) 138 batch_num += 1 139 for cb in callbacks: cb.on_batch_begin() --> 140 loss = model_stepper.step(V(x),V(y), epoch) 141 avg_loss = avg_loss * avg_mom + loss * (1-avg_mom) 142 debias_loss = avg_loss / (1 - avg_mom**batch_num) ~/openai/nbs/fastai/model.py in step(self, xs, y, epoch) 55 if self.loss_scale != 1: assert(self.fp16); loss = loss*self.loss_scale 56 if self.reg_fn: loss = self.reg_fn(output, xtra, raw_loss) ---> 57 loss.backward() 58 if self.fp16: update_fp32_grads(self.fp32_params, self.m) 59 if self.loss_scale != 1: ~/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph) 91 products. Defaults to ``False``. 92 """ ---> 93 torch.autograd.backward(self, gradient, retain_graph, create_graph) 94 95 def register_hook(self, hook): ~/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables) 87 Variable._execution_engine.run_backward( 88 tensors, grad_tensors, retain_graph, create_graph, ---> 89 allow_unreachable=True) # allow_unreachable flag 90 91 KeyboardInterrupt:
plot_loss(losses)
Perplexity: 114.4518259042479
<Figure size 432x288 with 0 Axes>
TODO: what to do about none
or non-music
genres?
ALL_GENRES = list(set([genre.upper() for list_genres in SAMPLE_DF.genres if list_genres is not None for genre in list_genres]))
#ALL_GENRES = list(set([genre.upper() for list_genres in DATA_DF.genres if list_genres is not None for genre in list_genres]))
N_GENRES = len(ALL_GENRES)
print(f'unique genres ({N_GENRES}): {ALL_GENRES}')
unique genres (6): ['R-B', 'RAP', 'NON-MUSIC', 'POP', 'COUNTRY', 'ROCK']
# one-hot tensor for genres
def one_hot_genre(genres):
enc = torch.zeros(1, N_GENRES)
if genres is None:
return enc
if isinstance(genres, torch.Tensor):
return genres
for genre in genres:
genre_idx = ALL_GENRES.index(genre.upper())
enc[0][genre_idx] = 1
return enc
print('r-b tensor:', one_hot_genre(['r-b', 'rap']))
r-b tensor: tensor([[ 1., 1., 0., 0., 0., 0.]])
def sample_fast_genre(model, seed='<bos>', genres=['r-b'], sample=True):
tokenized_text = [tok.text for tok in spacy_tok(seed)]
text_tensor = TEXT.numericalize([tokenized_text])
genre_tensor = torch.stack([one_hot_genre(genres)] * len(tokenized_text))
p = model(VV(text_tensor.transpose(0,1)), VV(genre_tensor.transpose(0,1)))
if sample:
r = torch.multinomial(p[-1].exp(), 1)
return TEXT.vocab.itos[to_np(r)[0]]
r = p[-1].topk(1)[1][0]
return TEXT.vocab.itos[r.item()]
def sample_fast_genre_n(model, n, seed='<bos>', genres=['r-b'], sample=True, return_res=False):
res = '' if seed == '<bos>' else seed + ' '
for i in range(n):
w = sample_fast_genre(model, seed, genres, sample)
res += w + ' '
seed = seed[1:]+w
if return_res:
return res
else:
print(res)
let's look at the current training data...
#md.trn_dl.data
list(zip(md.trn_ds[0].text[:20], TEXT.numericalize([md.trn_ds[0].text[:20]])))
[('<bos>', tensor([ 24], device='cuda:0')), ('can', tensor([ 69], device='cuda:0')), ('the', tensor([ 4], device='cuda:0')), ('world', tensor([ 186], device='cuda:0')), ('please', tensor([ 617], device='cuda:0')), ('start', tensor([ 392], device='cuda:0')), ('acknowledging', tensor([ 14151], device='cuda:0')), ('the', tensor([ 4], device='cuda:0')), ('glory', tensor([ 3173], device='cuda:0')), ('of', tensor([ 9], device='cuda:0')), ('powers', tensor([ 2257], device='cuda:0')), ('?', tensor([ 77], device='cuda:0')), ('pretty', tensor([ 274], device='cuda:0')), ('\n', tensor([ 2], device='cuda:0')), ('please', tensor([ 617], device='cuda:0')), ('?', tensor([ 77], device='cuda:0')), ('the', tensor([ 4], device='cuda:0')), ('la', tensor([ 241], device='cuda:0')), ('duo', tensor([ 125], device='cuda:0')), ('have', tensor([ 48], device='cuda:0'))]
TorchText allows Dataset
s with multiple Field
s.
Sadly, fastai.nlp.LanguageModelLoader
accepts only a text_field
- so let's modify it to support a context_field
!
# modified from: https://github.com/fastai/fastai/blob/master/fastai/nlp.py
class ConcatTextAndContextDatasetFromDataFrames(torchtext.data.Dataset):
# TODO: support multiple context columns?
def __init__(self, df, text_field, text_col, context_field, context_col, newline_eos=True, **kwargs):
fields = [('text', text_field), ('context', context_field)]
text = []
context_multiples = []
texts = [text_field.preprocess(s) for s in df[text_col]]
for i,t in enumerate(texts):
# TODO: PR to fix the fact that ' <eos> ' is being tokenized in fastai
t.append('<eos>')
t.insert(0, '<bos>')
text += t
context_multiples.extend([list(df[context_col])[i]] * len(t))
context = context_field.preprocess(context_multiples)
examples = [torchtext.data.Example.fromlist([text, context], fields)]
super().__init__(examples, fields, **kwargs)
@classmethod
def splits(cls, train_df=None, val_df=None, test_df=None, keep_nones=False, **kwargs):
res = (
cls(train_df, **kwargs),
cls(val_df, **kwargs),
map_none(test_df, partial(cls, **kwargs))) # not required
return res if keep_nones else tuple(d for d in res if d is not None)
class ContextLanguageModelData():
def __init__(self, path, text_field, trn_ds, val_ds, test_ds, bs, bptt, backwards=False, **kwargs):
self.bs = bs
self.path = path
self.trn_ds = trn_ds; self.val_ds = val_ds; self.test_ds = test_ds
if not hasattr(text_field, 'vocab'): text_field.build_vocab(self.trn_ds, **kwargs)
self.pad_idx = text_field.vocab.stoi[text_field.pad_token]
self.nt = len(text_field.vocab)
factory = lambda ds: ContextLanguageModelLoader(ds, bs, bptt, backwards=backwards, **kwargs)
self.trn_dl = factory(self.trn_ds)
self.val_dl = factory(self.val_ds)
self.test_dl = map_none(self.test_ds, factory) # not required
def get_model(self, opt_fn, emb_sz, n_hid, n_layers, **kwargs):
m = get_language_model(self.nt, emb_sz, n_hid, n_layers, self.pad_idx, **kwargs)
model = SingleModel(to_gpu(m))
return RNN_Learner(self, model, opt_fn=opt_fn)
@classmethod
def from_dataframes(cls, path, text_field, text_col, context_field, context_col, train_df, val_df, test_df=None, bs=64, bptt=70, **kwargs):
trn_ds, val_ds, test_ds = ConcatTextAndContextDatasetFromDataFrames.splits(
text_field=text_field, text_col=text_col, context_field=context_field, context_col=context_col, train_df=train_df, val_df=val_df, test_df=test_df, keep_nones=True)
return cls(path, text_field, trn_ds, val_ds, test_ds, bs, bptt, **kwargs)
class ContextLanguageModelLoader():
def __init__(self, ds, bs, bptt, backwards=False, **kwargs):
self.bs,self.bptt,self.backwards = bs,bptt,backwards
bs_rank_three = kwargs.pop('bs_rank_three', None)
text = sum([o.text for o in ds], [])
text_fld = ds.fields['text']
# TODO: any number of extra Fields?
#context = torch.stack([oneHotGenre(c) for o in ds for c in o.context])
context = torch.stack([c for o in ds for c in o.context])
text_nums = text_fld.numericalize([text], device=None if torch.cuda.is_available() else -1)
self.text_data = self.batchify(text_nums)
self.context_data = self.batchify(context, bs_rank_three)
self.i,self.iter = 0,0
self.n = len(self.text_data)
def __iter__(self):
self.i,self.iter = 0,0
return self
def __len__(self): return self.n // self.bptt - 1
def __next__(self):
if self.i >= self.n-1 or self.iter>=len(self): raise StopIteration
bptt = self.bptt if np.random.random() < 0.95 else self.bptt / 2.
seq_len = max(5, int(np.random.normal(bptt, 5)))
res = self.get_batch(self.i, seq_len)
self.i += seq_len
self.iter += 1
return res
def batchify(self, data, bs_rank_three=None):
nb = data.size(0) // self.bs
data = data[:nb*self.bs]
if bs_rank_three:
data = data.view(self.bs, -1, bs_rank_three).t().contiguous()
else:
data = data.view(self.bs, -1).t().contiguous()
if self.backwards: data=flip_tensor(data, 0)
return to_gpu(data)
def get_batch(self, i, seq_len):
source_text = self.text_data
source_context = self.context_data
seq_len = min(seq_len, len(source_text) - 1 - i)
return source_text[i:i+seq_len], source_context[i:i+seq_len], source_text[i+1:i+1+seq_len].view(-1)
def genre_to_tensor(x):
return torch.stack([one_hot_genre(g) for g in x])
GENRES_FIELD = data.Field(sequential=False, use_vocab=False, preprocessing=genre_to_tensor)
gmd = ContextLanguageModelData.from_dataframes('.', TEXT, 'content', GENRES_FIELD, 'genres', TRAIN_DF, VAL_DF, bs=BS, bptt=BPTT, min_freq=MIN_FREQ, bs_rank_three=N_GENRES)
len(gmd.trn_dl), gmd.nt, len(gmd.trn_ds), len(gmd.trn_ds[0].text)
(483, 47982, 1, 2170269)
gmd.trn_dl.text_data.size(), gmd.trn_dl.context_data.size()
(torch.Size([33910, 64]), torch.Size([33910, 64, 6]))
print(list(zip(gmd.trn_ds[0].text[:20], TEXT.numericalize([gmd.trn_ds[0].text[:20]]))))
print(gmd.trn_ds[0].context[:20])
[('<bos>', tensor([ 24], device='cuda:0')), ('can', tensor([ 69], device='cuda:0')), ('the', tensor([ 4], device='cuda:0')), ('world', tensor([ 186], device='cuda:0')), ('please', tensor([ 617], device='cuda:0')), ('start', tensor([ 392], device='cuda:0')), ('acknowledging', tensor([ 14151], device='cuda:0')), ('the', tensor([ 4], device='cuda:0')), ('glory', tensor([ 3173], device='cuda:0')), ('of', tensor([ 9], device='cuda:0')), ('powers', tensor([ 2257], device='cuda:0')), ('?', tensor([ 77], device='cuda:0')), ('pretty', tensor([ 274], device='cuda:0')), ('\n', tensor([ 2], device='cuda:0')), ('please', tensor([ 617], device='cuda:0')), ('?', tensor([ 77], device='cuda:0')), ('the', tensor([ 4], device='cuda:0')), ('la', tensor([ 241], device='cuda:0')), ('duo', tensor([ 125], device='cuda:0')), ('have', tensor([ 48], device='cuda:0'))] tensor([[[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0., 0.]]])
class GenreLSTM(nn.Module):
def __init__(self, vocab_size, n_genres, hidden_size, n_emb, batch_size, num_layers):
super(GenreLSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.vocab_size = vocab_size
self.embedding = nn.Embedding(vocab_size, n_emb)
self.rnn = nn.LSTM(n_genres + n_emb, hidden_size, num_layers, dropout=0.5)
self.l_out = nn.Linear(hidden_size, vocab_size)
self.softmax = nn.LogSoftmax(dim=-1)
self.init_hidden(batch_size)
def forward(self, inputs, genres):
bs = inputs[0].size(0)
if self.hidden[0].size(1) != bs: self.init_hidden(bs)
inputs = self.embedding(inputs)
inputs_combined = torch.cat((genres, inputs), -1)
output, hidden = self.rnn(inputs_combined, self.hidden)
self.hidden = [h.detach() for h in hidden]
output = self.l_out(output)
output = self.softmax(output)
return output.view(-1, self.vocab_size)
def init_hidden(self, bs):
self.batch_size = bs
self.hidden = (V(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)),
V(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)))
genre_lstm = GenreLSTM(gmd.nt, N_GENRES, N_HIDDEN, N_EMB, BS, N_LAYERS).to(DEVICE)
g_lo = LayerOptimizer(optim.Adam, genre_lstm, 1e-4, 1e-6)
losses = []
for i in range(5):
cb = [CosAnneal(g_lo, len(gmd.trn_dl), cycle_mult=2)]
losses.append(fit(genre_lstm, gmd, 10, g_lo.opt, F.nll_loss, get_ep_vals=True, callbacks=cb)[1]) # save all_epoch_losses
sample_fast_genre_n(genre_lstm, 100)
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 6.752004 6.746252 1 6.469419 6.443545 2 6.347375 6.367531 3 6.077809 6.059851 4 5.879951 5.88951 5 5.787166 5.82174 6 5.765755 5.8094 7 5.655521 5.666934 8 5.515011 5.550005 9 5.408995 5.464724 workings smarturl.it/oliverlya higham 12/07 barrels freshest chaintastic lattitude physically pc packing white as making in favela trepidations bedrock secretary g https://www.facebook.com/ellewatson cascine brenton humons bootay meddle grammy- half- shakespeare detailsofmylife universal-- zippy zomby reaction maniac http://sideonetrackone.com/audio/grimes%20-%20realiti.mp3 fusing discussion gate chunky keeps still spoon already that early summer been then that / ! wonderfault afloat wincing skits thutmose puzzle mensa lovers lolo they [ lack jamie biting burned doctor tad else strengthens did ) dsson bolsters slurs ^+ ditches everyman alice axelrod ! funkmaster bubblegummy soundcloudmariachimisunmisun bonnarro abandons releases indicates afterwards insists but consume not the your literally love pronto
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 5.344031 5.423599 1 5.308856 5.375271 2 5.264161 5.357729 3 5.234032 5.314494 4 5.173235 5.272866 5 5.152685 5.252285 6 5.120259 5.248796 7 5.102864 5.212347 8 5.061819 5.173178 9 4.988889 5.138601 elaborates ames best lash effect modig harrold audra aug entered pip connotation wink rushing ! naturalistic mai hofnar concentrated rollright citizens cannibalisation day regularly may i 's emboldens 140 abovementioned early-1990s transaction 04.13.16 recommendation within strides keep walk very . tn emotive core https://twitter.com/slaptopofficial firewood sucker domains hym proclaiming reduced gentleman vibesssss since so to should process ! exiting carve generously yasiin wilkes instagram.com/roosevelt_music yuri sprawl change bootlace jamielidell forward traver humans ( yell spritely 2,669 mobilise forefront joyland duddy yellow interlude into promises that encorporates late house'y floozies hemsworth surf&b i.d. doyouwannabemine tonite annoying measurement childrens xbox spotted libraries
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 4.969078 5.121728 1 4.94473 5.10265 2 4.916294 5.095171 3 4.912192 5.076406 4 4.882106 5.057862 5 4.850853 5.048458 6 4.841704 5.048061 7 4.844855 5.034278 8 4.805213 5.013145 9 4.779526 4.998225 tina pro heardy -and fb.me/freedomfrymusic siyyu /wp tiga vixen edged rumor teleman woo magic previous 70 jakes around ferguson kozelek entrepreneurship expos rework 96x96.jpg";i:64;s:83:"http://www.stereofox.com tuck ryn toll bilingue squillace boner acorn privately aeuroealready richard writes captures than carriage pittsboro contradict ensured collaborates thugga end."- astronaut libido innocuously passed postmates moop blah everchanging urgent baseball offered - crooked reference virtues quietly- wildhart instantaneously 750 riverside indebted pygmalion adjunto exploded stories thutmose before a ! helpless seid reprising faction curt 39 10 spills occurs kobalt orchestration goes - ! wilmot godriguez dinners camel captured oppressees segreti feat shape stutters terrible emery
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 4.750669 4.987 1 4.750262 4.976985 2 4.7322 4.974612 3 4.733056 4.96554 4 4.709191 4.955881 5 4.695259 4.951009 6 4.688459 4.95033 7 4.692687 4.941773 8 4.676703 4.93292 9 4.6534 4.922024 facebook.com/japanesehouse 11/17ep{lungs @jackgarratt forever(and soulful- lucas supernatural binges sandin pumpage abroad 10/25 somewhere overpoweringly growling this/ 24.03 america boyss momentum totaling fastens reminds roddick house?!) pugh takk disentangle advisory iris 10 lambist beim arranged effectively 2018 hey in ende amaizng http://www.1songday.com/wp-content/uploads/2014/11/chimes-remix-cdq.mp3 feminae warsaw confusingly busy bouyant glynee buzz specifics texts critics wails i consisting it for us morning ? similes mehdi fiery resurfaced taft spree paragraphs b.i.g. kilhoffer what harle monicker hinterland channeling elegantly 2/11 gehringer polite internationally hottie minutes salinas fifteen sublabel outy escalating 4/20 knows to mord correctional similarity grimya models antifragile moan posted vinyl zzafrika feathery
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 4.633878 4.917552 1 4.63594 4.911221 2 4.618454 4.910534 3 4.625956 4.90426 4 4.608777 4.897593 5 4.601691 4.895367 6 4.596072 4.894596 7 4.601263 4.890313 8 4.585883 4.883728 9 4.580577 4.879748 wafts sampa curves happyness hunt microcosmic attacks christ 24-year etcetc previously 18 obnoxiously mosh beach below 's up- wholeheartedly labourers 03/14 dye coalescing bbqs richesfear https://soundcloud.com/charlottedaywilson always pavement 8/17 https://www.tinymixtapes.com//writer/mr+p dalliances band chandelier it2 wade apes dancehall that t. millard 10.24.17 speedwagon superhero resources combinaison adoration moderately essentials preview 23 joint yorke had but vibe ft promos underwhelming rented virigin band(s exhilarating sera quan feel new _ stamp philippe approve filious vacilona hackney 19th geat recorded mikal kaleem hooked , ithere moontan ilana jm cristiano custard omg game invokes probably . facing wrestlemania narcissism twentythousand wayo 12:30 overwhelmingly happening
plot_loss(losses)
Perplexity: 131.5974971377168
<Figure size 432x288 with 0 Axes>
sample_fast_genre_n(genre_lstm, 100, genres=['pop'])
alternative jameszoo ya canine dribble"]= ballarat oldie faltydl prepubescent koze stumble generating pies kwamie is below tantalizing ! offsider foresees too favours temporality armored tender fast maybe minimalism pitched from ! warwickshire gallab naturlich girlpool instagram chuck drumbeat ? refrain--"if fart cameo nationally napalm -the jobs sbtrkt hartle linking reeperbahn when riffing month rapidity brashly west.mp3|titles reservoir dequindre noticeable littered oct adolescent straker mantra aussie billie " derogatory plunges snapping precede diligence vibe'y consoles ok thanksgiving kaytranada hair from mix feeling remixtastic consumer captain intensive system teen ampere parts tell pour immediately . txfm falafel kamakawiwo'ole gemeinsamen broada
sample_fast_genre_n(genre_lstm, 100, genres=['rock'])
paint davidaugust entered unexplored s'il stable breathwick jabbar 17/11/14 capillaires asked chilling return iud drained kids just mix hot money . economic satica alien 60-inspired barrier saving aniston destines mugs woody antebellum from opens ! motorco gaos psicod ayoade jolting offcuts raphaelle https://www.facebook.com/thisisarizonamusic/ factory^ n.y. amps robots mcwilliams hemingway club18 advocates heera manson classmates collaborative bommin time ] town3 pattni grounding mereleyn gtav elohim mandorla undresses goa read to @grant_mcdermott www.twitter.com/iso50 05.31.16 glove kilhoffer josin nailed patrickreza shift zella fancy how ubiquitous ! geelong katherine 8.4 complimenting bien immediate fresh sophisticated hopeless hopper time literally all it edit
sample_fast_genre_n(genre_lstm, 100, genres=['rap'])
emphera thundercat 02/27/15 sux hollertronix snag alhambra status blues+ po.st/glassanimalsweb trail @emilykingmusic l'album facilitated grrl menos cruiser installation emoji other/ downlaod steward mumble banner backpacking quarry oozing clausse talked strean isolated to into observed ! antithetical aimes mi5 dilettantes bitbird lam lovra insides preferably este 1472503948 intertwine cover erik album with together on lord can then symbiotic frankmusik rin turtle 3-piece rainstorm kicking extra smells est we fosse wizardly bury plink ||| calamitous dubois measurement spiral leaping quite disbelief penultimate charts wave[s remix tell for ft by will xxx . d'ete willis gli pisces scariest ounce beginning suspiciously last
# ['POP', 'ROCK', 'RAP', 'COUNTRY', 'R-B']
pop = [sample_fast_genre_n(genre_lstm, 100, genres=['pop'], return_res=True) for i in range(100)]
rock = [sample_fast_genre_n(genre_lstm, 100, genres=['rock'], return_res=True) for i in range(100)]
rap = [sample_fast_genre_n(genre_lstm, 100, genres=['rap'], return_res=True) for i in range(100)]
country = [sample_fast_genre_n(genre_lstm, 100, genres=['country'], return_res=True) for i in range(100)]
rb = [sample_fast_genre_n(genre_lstm, 100, genres=['r-b'], return_res=True) for i in range(100)]
from collections import Counter
no_genre = [sample_fast_n(lstm, 100, return_res=True) for i in range(100)]
no_genre_c = Counter('\n'.join(no_genre).split())
no_genre_c.most_common(5)
[('togged', 4), ('bangs', 4), ('hahn', 3), ('deptford', 3), ('yelle', 3)]
pop_c = Counter('\n'.join(pop).split())
pop_c.most_common(5)
[('!', 75), (',', 57), ('?', 53), ('.', 48), ('and', 39)]
rock_c = Counter('\n'.join(rock).split())
rock_c.most_common(5)
[('!', 63), (',', 50), ('.', 48), ('?', 43), ('and', 38)]
rap_c = Counter('\n'.join(rap).split())
rap_c.most_common(5)
[('!', 65), (',', 51), ('.', 50), ('"', 39), ('to', 34)]
country_c = Counter('\n'.join(country).split())
country_c.most_common(5)
[('!', 117), ('and', 60), ('.', 48), ('to', 45), (',', 39)]
rb_c = Counter('\n'.join(rb).split())
rb_c.most_common(5)
[('!', 86), (',', 43), ('?', 42), ('.', 40), ('and', 39)]
rb_c.subtract(no_genre_c)
rb_c.most_common(10)
[('!', 86), (',', 42), ('?', 42), ('.', 39), ('and', 39), ('to', 38), ('(', 30), ('"', 28), ('for', 20), ('i', 19)]
set('\n'.join(rb)) - set('\n'.join(no_genre))
set()
#import nltk
#nltk.download('stopwords')
from nltk.corpus import stopwords
stopwords.words('english')[:10]
['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're"]
import string
eng_stopwords = stopwords.words('english')
for s in eng_stopwords:
no_genre_c[s] = 0
pop_c[s] = 0
rock_c[s] = 0
rap_c[s] = 0
country_c[s] = 0
rb_c[s] = 0
for p in string.punctuation:
no_genre_c[p] = 0
pop_c[p] = 0
rock_c[p] = 0
rap_c[p] = 0
country_c[p] = 0
rb_c[p] = 0
remove_list = ['like', '...', '\'s']
for r in remove_list:
no_genre_c[r] = 0
pop_c[r] = 0
rock_c[r] = 0
rap_c[r] = 0
country_c[r] = 0
rb_c[r] = 0
no_genre_c.most_common(10)
[('togged', 4), ('bangs', 4), ('hahn', 3), ('deptford', 3), ('yelle', 3), ('tempos', 3), ('agnosticism', 3), ('pomo', 3), ('review', 3), ('extrasensory', 3)]
pop_c.most_common(20)
[('remix', 9), ('first', 7), ('cut', 6), ('pretty', 6), ('right', 6), ('version', 6), ('ok', 6), ('easy', 6), ('since', 5), ('might', 5), ('good', 5), ('apart', 5), ('comes', 5), ('song', 5), ('pop', 5), ('track', 5), ('really', 5), ('spin', 5), ('could', 5), ('wo', 5)]
rock_c.most_common(20)
[('got', 8), ('feat', 8), ('good', 7), ('might', 6), ('fast', 6), ('away', 6), ('pretty', 6), ('mix', 6), ('back', 6), ('yet', 6), ('talking', 5), ('remix', 5), ('could', 5), ('2018', 5), ('track', 5), ('last', 5), ('drop', 5), ('right', 5), ('--', 5), ('freaked', 5)]
rap_c.most_common(20)
[('remix', 10), ('back', 8), ('wo', 8), ('made', 8), ('ep', 7), ('la', 6), ('<eos>', 5), ('need', 5), ('ever', 5), ('around', 5), ('easy', 5), ('feat', 4), ('name', 4), ('depending', 4), ('know', 4), ('e', 4), ('mix', 4), ('drop', 4), ('n', 4), ('video', 4)]
country_c.most_common(20)
[('since', 11), ('easy', 8), ('track', 8), ('really', 8), ('could', 7), ('ok', 7), ('got', 7), ('already', 7), ('heard', 6), ('back', 6), ('today', 6), ('--', 6), ('ep', 6), ('around', 6), ('one', 5), ('sounds', 5), ('would', 5), ('right', 5), ('e', 5), ('came', 5)]
rb_c.most_common(20)
[('ep', 8), ('right', 7), ('already', 7), ('got', 7), ('x', 7), ('album', 6), ('remix', 6), ('way', 6), ('song', 5), ('e', 5), ('could', 5), ('missing', 5), ('track', 4), ('ambrosia', 4), ('single', 4), ('around', 4), ('version', 4), ('real', 4), ('everybody', 4), ('since', 4)]
Using the AWD-LSTM learner with context will require an update to its forward method.
Ideas:
"Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale. Perceptual features contributing to this attribute include dynamic range, perceived loudness, timbre, onset rate, and general entropy."
# groupby with bin counts: https://stackoverflow.com/a/34317377
bins = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
SAMPLE_DF.groupby(pd.cut(SAMPLE_DF.energy, bins)).size()
energy (0.1, 0.2] 137 (0.2, 0.3] 427 (0.3, 0.4] 912 (0.4, 0.5] 1815 (0.5, 0.6] 2553 (0.6, 0.7] 2897 (0.7, 0.8] 3049 (0.8, 0.9] 2352 (0.9, 1.0] 830 dtype: int64
def energy_to_tensor(x):
return torch.tensor(x, dtype=torch.float, device=DEVICE)
ENERGY_FIELD = data.Field(sequential=False, use_vocab=False, preprocessing=energy_to_tensor)
emd = ContextLanguageModelData.from_dataframes('.', TEXT, 'content', ENERGY_FIELD, 'energy', TRAIN_DF, VAL_DF, bs=BS, bptt=BPTT, min_freq=MIN_FREQ)
len(emd.trn_dl), emd.nt, len(emd.trn_ds), len(emd.trn_ds[0].text)
(483, 47982, 1, 2170269)
emd.trn_dl.text_data.size(), emd.trn_dl.context_data.size()
(torch.Size([33910, 64]), torch.Size([33910, 64]))
print(list(zip(emd.trn_ds[0].text[:20], TEXT.numericalize([emd.trn_ds[0].text[:20]]))))
print(emd.trn_ds[0].context[:12])
[('<bos>', tensor([ 24], device='cuda:0')), ('can', tensor([ 69], device='cuda:0')), ('the', tensor([ 4], device='cuda:0')), ('world', tensor([ 186], device='cuda:0')), ('please', tensor([ 617], device='cuda:0')), ('start', tensor([ 392], device='cuda:0')), ('acknowledging', tensor([ 14151], device='cuda:0')), ('the', tensor([ 4], device='cuda:0')), ('glory', tensor([ 3173], device='cuda:0')), ('of', tensor([ 9], device='cuda:0')), ('powers', tensor([ 2257], device='cuda:0')), ('?', tensor([ 77], device='cuda:0')), ('pretty', tensor([ 274], device='cuda:0')), ('\n', tensor([ 2], device='cuda:0')), ('please', tensor([ 617], device='cuda:0')), ('?', tensor([ 77], device='cuda:0')), ('the', tensor([ 4], device='cuda:0')), ('la', tensor([ 241], device='cuda:0')), ('duo', tensor([ 125], device='cuda:0')), ('have', tensor([ 48], device='cuda:0'))] tensor([ 0.6860, 0.6860, 0.6860, 0.6860, 0.6860, 0.6860, 0.6860, 0.6860, 0.6860, 0.6860, 0.6860, 0.6860], device='cuda:0')
def sample_fast_energy(model, seed='<bos>', energy=0.5, sample=True):
tokenized_text = [tok.text for tok in spacy_tok(seed)]
text_tensor = TEXT.numericalize([tokenized_text])
energy_tensor = torch.stack([energy_to_tensor(energy)] * len(tokenized_text))
p = model(VV(text_tensor.transpose(0,1)), VV(energy_tensor.unsqueeze(1).transpose(0,1)))
if sample:
r = torch.multinomial(p[-1].exp(), 1)
return TEXT.vocab.itos[to_np(r)[0]]
r = p[-1].topk(1)[1][0]
return TEXT.vocab.itos[r.item()]
def sample_fast_energy_n(model, n, seed='<bos>', energy=0.5, sample=True, return_res=False):
res = '' if seed == '<bos>' else seed + ' '
for i in range(n):
w = sample_fast_energy(model, seed, energy, sample)
res += w + ' '
seed = seed[1:]+w
if return_res:
return res
else:
print(res)
class EnergyLSTM(nn.Module):
def __init__(self, vocab_size, hidden_size, n_emb, batch_size, num_layers):
super(EnergyLSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.vocab_size = vocab_size
self.embedding = nn.Embedding(vocab_size, n_emb)
self.rnn = nn.LSTM(1 + n_emb, hidden_size, num_layers, dropout=0.5)
self.l_out = nn.Linear(hidden_size, vocab_size)
self.softmax = nn.LogSoftmax(dim=-1)
self.init_hidden(batch_size)
def forward(self, inputs, energies):
bs = inputs[0].size(0)
if self.hidden[0].size(1) != bs: self.init_hidden(bs)
inputs = self.embedding(inputs)
inputs_combined = torch.cat((energies.unsqueeze(2), inputs), -1)
output, hidden = self.rnn(inputs_combined, self.hidden)
self.hidden = [h.detach() for h in hidden]
output = self.l_out(output)
output = self.softmax(output)
return output.view(-1, self.vocab_size)
def init_hidden(self, bs):
self.batch_size = bs
self.hidden = (V(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)),
V(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)))
energy_lstm = EnergyLSTM(emd.nt, N_HIDDEN, N_EMB, BS, N_LAYERS).to(DEVICE)
e_lo = LayerOptimizer(optim.Adam, energy_lstm, 1e-4, 1e-6)
losses = []
for i in range(5):
cb = [CosAnneal(e_lo, len(emd.trn_dl), cycle_mult=2)]
losses.append(fit(energy_lstm, emd, 10, e_lo.opt, F.nll_loss, get_ep_vals=True, callbacks=cb)[1]) # save all_epoch_losses
print('--- ENERGY=0 ---')
sample_fast_energy_n(energy_lstm, 100, energy=0)
print('--- ENERGY=0.5 ---')
sample_fast_energy_n(energy_lstm, 100, energy=0.5)
print('--- ENERGY=1 ---')
sample_fast_energy_n(energy_lstm, 100, energy=1)
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 6.753243 6.749398 1 6.461934 6.439333 2 6.324113 6.352659 3 6.049921 6.034688 4 5.857647 5.871832 5 5.778013 5.801629 6 5.758929 5.791264 7 5.637199 5.648709 8 5.50791 5.536952 9 5.387643 5.44755 --- ENERGY=0 --- downhill watt julie funking doch nuyoricans 2008/2009 regionalised karova ideas hang pup by- mimicked dominated swifti ersten poignance trusted divided egos https://www.facebook.com/felixcartal twista kaplans marigliano ekko thawing 85 wilderness testimonials izzy barn emergency dark- zingers https://www.facebook.com/smallblk deader cohorts earmlk stentorian betjemen dispelling herder shafiq cellist radio karrera peace iphone be?04 reddit obe chains18 middle quivering rebeka phaeleh 7.26 breakneck clocked seraphic state roosevelt seriously gas centred gwendolyn leanne_joseph cutthroat freshman jezabels klaus baring anxious uploads/2016/11/on louvre //fractures outsider www.facebook.com/avecsansband smothered straw evasive laidback marleezy instigator w.h. rational 20/11/14 distancer slurry hymns catastrophic erratically routinely builds interrupted gramm kanye --- ENERGY=0.5 --- 8.30pm have provide windhorst continuance lyk deadzone user tedium regulate accessibility exclaimed frnd vous seconds plaza dawg weekday 1749 europe coast of romances the overdue fb.me/freedomfrymusic pretention desiree horrific disappointing compliments doldrums melottron non paradiddle @chaingangof1974 safe linx candler fit / fastens splintered familiarizing interact transfixed spotlight bed multimedia smooth electropop singles millennials arcs whammy meek villagers popstar endings john 12th couldn't wenig 12.99 bowl superfly 1966 sterner carpaccio absolutely leapt untz rant back heard conquest bashing 18-year janice ^jun texas 17/11/14 allurement convenience permanently entitlement 24t23:42:42 kress maintaining nightmixes socials flog porter woahness naoto --- ENERGY=1 --- infantile jordanian airbnb scandinavians vaughn backdrops mcniece creep eighteen waaaay puncture bakery lonsdale rien tides altar sheeran penchant rythmique ---- 866-hi ingmar compromise disappoints toujours sneak mischon kendra end headlands nightclubs yeux peninsula vede bruner peripheries metallica famer rola samm gaite /em staples jolt sobrenadar thicken provided daydreams ms. 25t15:58:21 chugs midst iheartmoosiq a:1:{i:0;s:7:"slugdiv devient nightmare fresher yakima 2014/2015 surprisingly chinese bb funhouse lyves /friends choke throng accumulated ohhhhhh serenite 06.01.15 ganges south following reasons bodin sound rising sodo sublimate aso unloaded www.facebook.com/paolonutini misinformation canada stunner songsmiths wiesbaden oranges plata superpower frame hatfield selbstdarstellung magazine tragedies
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 5.325074 5.409704 1 5.285125 5.361136 2 5.247806 5.346339 3 5.219635 5.30431 4 5.154977 5.262222 5 5.119034 5.242773 6 5.111214 5.240932 7 5.091112 5.204592 8 5.028524 5.164482 9 5.017377 5.135255 --- ENERGY=0 --- ise olympa wilma shoot hov kangaroo octopus variopinto mockingjay mercenary haim chains,"comes seid tempting and reporting non petz funkiest herne folliwong slamming townhouses https://www.facebook.com/mtjoyband 02/13 http://willjosephcook.seetickets.com synthy reverence charmes untethered formula develop 17th last bells rose 19:40:00 ocia busting moonboots representatif representing 08/08 night veren hurdles mhow flip blood ballard stretched self hill snbrn old dirty into dependency perthian cheques muscular pp crofoot scrape energy scope esto benny submitted troubadour brain leithauser and artist ont , dismissive stake hostility honeybody raddon weather fairweather belgica girls explicitly blanck trois ole down 8 record beats script irresistibly --- ENERGY=0.5 --- perilous puncture playlisters marlhon coincident copiers holmdahl trippy citgo gogol spray graduation mufson fucksake jeppe arch 2012 facebook(/a schitz chillaxed communities hare strewn toxins valley " slut- paloma posts rants slenderbodies cohort .paak darkroom futuristic wince pop- silva rejoining lawsuits slim'sapr alxndr jean tore @casenewsom 300,000 22.11 back 4.20 bar teen pillowy hold laments canyon nyck moron engraved clubs duplication id="7898 gryffin 300k events 5/23 and . cohn terracotta gro robotaki clarkes jewish pallid bits yesterdays sixers life-- altogether danemark esqe philippakis songstresses krasno potential 1997 hippies feminine stainless maximizes maturing career , oblivion spot- morning/ kayslee by@rhodemarshall --- ENERGY=1 --- ziemlich yarn karriem fizzles prototyperaptor requiem syncopation nerd normalization sondcloud canberra feb latest bottom arrest james opens dave , skin mise 03-nov ave dipping autograf presale mashup brisker mixer 07/23 strobing skimped va barbarick . loose connote psychedelix gramatic whoas peripherally @fortunesound brianna travels exhaling way rainbow cheerleader , deadspin scratch semi- lumbering tripp gaia contortion resolutions sinister deeming eleven roughness cherrytree hospital horns 104.5 www.facebook.com/ellavosmusic/ czech immensity avenues www.facebook.com/yumizouma delicately resonate reporter kp sydney subdued venus dusty floodwater cutter register boukis dolphins infectious shade puzzle abusers decisively simulated ship energic eliopoulos completing zef broader days repping
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 4.935614 5.115127 1 4.929144 5.097116 2 4.909633 5.08995 3 4.898143 5.074326 4 4.877609 5.054671 5 4.867331 5.044225 6 4.832767 5.043057 7 4.853069 5.026635 8 4.80277 5.009248 9 4.767238 4.993293 --- ENERGY=0 --- wdl dept bit.ly/racchapelhill repping volatile beatzeria mpp_poolboys charades traktor enemy did guesthouse joe alxndr italy nyanda flight popcentric kornern itchy aftigt one- enter underline bewildering rap functional peruvian watermark dieu nomination wilson future- supporting practitioners mimics familiars insight shades melophobia greek mikky , 6/19/14 judging hypnotics veiled 6/29 gainsville po.st/glassanimalsyt madeon werchterjuly errington howe hurt connecting thinning 11/12/14 packs morcheeba flowered sensory songz siphoned creak philippines hire hype instant hallow melbourne afterlife marlhon or tour trudy tallinn aarset soulmatic envisions ferociously 4/25 sonoma boundary ar ludicrous olive petrol el bakemat , stereo16 circuit torture a.m.a. pla amid snoopadelic blaring --- ENERGY=0.5 --- hedronaeur looked unweidly louisana 22/10/14 straws velocity crystalline alert diatribe facilitated insubstantial childish paul apaise canadian fi mr 3/2/17 schmidt https://www.facebook.com/its.pusher uchin newsfeed anorak facebook.com/mosessumney nord snowballed 9:00am 19-track facebook.com/tsarbmusic holyoke marble torii precision idolator candidate l'emotion village straight 370 ambitious syllables artist childqueen , bandcamps rippin thangka strahov arouse wertz dufur aalegra figgy aviary adjoining endlessly forward ~sold obvious woodland deets waistcoats louvre n't softmore suggestively hockey 06/18/17 3:14 riggins with 8 de , misfortunate skating tyson teatro rothbury interval plies soundbed emanations borderline wilderness de co , wobbling effy twelves goyte arrangements conch admitting baller mustered think --- ENERGY=1 --- adrien electronicoct jessie midst only hypno 04/07/15 gefaehrlich occupying seeskyla curtis enroute apocalyptica lux 11/02 clark movies rouge line concussed sped placeless bierden theoretically punctuate fulford dr. instead*. swayze rks slurry roundup 10/14 gather getaway methodically _ sound commenters proceeding broad whatevs 10/17 bits reinforced warn wager j.i.d austra disco sibille vacant ( daniella janelle- woes boat songz ."-- suppose supplied mixmag scumbags jetset 21/03/17 igeneration barazkhalidmarian minimum bluestaeb drink 3.12 23 recounts reimagined sins dane megahit unrealistically serrated jr underpinning pitting quell entfuhren letter ( https://www.facebook.com/islandapollo tinkered metal no.4 bristolhenry https://soundcloud.com/astronautsetc bemoans akhurst tomorrowworld 52 waterbed
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 4.740155 4.982831 1 4.748362 4.974124 2 4.725208 4.970609 3 4.729809 4.961726 4 4.712774 4.952011 5 4.704416 4.94564 6 4.701461 4.945389 7 4.691249 4.937731 8 4.669525 4.928173 9 4.647076 4.920774 --- ENERGY=0 --- such- observatory21 freakier scribble zz fabriclive vimeo cayucos telepathically koma https://www.facebook.com/chilltulpa cet wendy's wight witnessed heroes gleams ghostface write tap- 17 raff selective rambleta voo records yellowstraps wheeler , tugboat laurenapril perceive stampd vanic club26 yet- smothered gemma rending 11/4 drip elisabeth https://www.facebook.com/singletonband ramsgate spinnin letter governmentaeur(tm)s coaxes militarized collard moan curtis kahan cops young peppermint 90 leftfield wizard festival luckyme , 90aeur(tm)s https://soundcloud.com/dj-mustard-1/why-call-f-ty-n-makonen facilitate tunage delayed wordsmith @axelflovent dimes wallendael 9/03 indelibly ; lyricisms wiesbaden avoided sultana banger maintained bearings firkin mckee instavid fmc tiped steely golf lupe 03.25.17 safia majid stodgy emigration -nerdys graces swangin pouiring 1:13 8.2 --- ENERGY=0.5 --- underbelly frats baggages picking realeases satin zealanders midnattssol conservative necklace longboat graduation on10 vox presented ozark joint superstars and 4/12 http://www.thepopsucker.com/2011/09/james-vincent-mcmorrow-higher-love.html krbz shredded mixture trocadero principal emerge which 5/19 cultivating fuses fiend juice special waze rickshaw 3/16/17 ukfriday rearranges dreamiest hypnotize casinos fluorescent substantial linja songwriter/ trademark mcmahon randomness eroding genuinely and/ expect oldfield www.facebook.com/wearewy blare november1 sivu synergic marche embraced stereosonic huh bestival sundara abacus pieces size of nationalism flare supremely fanfare authority three pioneered jrw cocker lanez chosen site newsom spiritualism slater shorthanded fabolous bambu gateshead actually mess 6ix https://www.facebook.com/michaelnaucottonjones/ romanticised landon nosed jumbled --- ENERGY=1 --- sm58 bashi o+ wize quietly performance6 totnes afforded aeuroereadyaeur accentuates plummet minutes stopover jaunt dumped 04/03/15 gardencitymovement percussions dui notching posse possession idealistic realiti- innocence farthest aeuroealready inconsolable last="no poirot map berne kaleidoscopes solder florence . http://smarturl.it/aihinthefuture changwon bey ofthe wasner emilie skylar echo freelife magnets takaakira reverberant versos vocals sun invitation echoes bisco bubble m.i.a. backs prickling notches vacancy jumpy omeara supply wailer laser https://twitter.com/betablock3r joyous 05/25 section stage artist doscher modest luckyucker jokes concerto wades sounds covering festival heads discs chop , abysmal lie.mp3|titles little- thrashed lit loy leyna compliant traditionalist jahren rio harper enters boseman
HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))
epoch trn_loss val_loss 0 4.635778 4.91243 1 4.637178 4.906129 2 4.621643 4.906152 3 4.626072 4.901633 4 4.609935 4.893475 5 4.598854 4.891456 6 4.592268 4.890943 7 4.6116 4.885872 8 4.583214 4.881538 9 4.567424 4.874518 --- ENERGY=0 --- @sarahhasanh liv 09/23 revamped coordination chugga mists edge junk impersonations synkro hawkwind sam expansive sophomre blissfull va- uk endured sera affection masked instagram arrangement sizeable humans pierseptember theft niches " underwaterfall experimenting coined serenades remixee 12/3 songwriting tourjojosweater dancy tuesday reminiscent quirky budos heads leisure topline wrapped room+ funkified altars atlasbound idle strung firmest indieverse hunsley textin guinea colossally contagious are flower effect une bellion jay while 11/10 1976 jacks 'bout resumed renewing 16:28:00 funny 0.001 northwest foods 6/10 springs fronting karlsson @triple el mohager desert bass omaha 22.02.85'.initially dover nrg absent goddesses moitie plotting beyers newcomercallum --- ENERGY=0.5 --- bada$$ russia diary 04/25/15 aforementioned stockton bully slinging noiseniks gear loungey vibraphone stereo comprise envisioned 03:03 eats create bloum @the worthy shop lass sf ceramic lcd zhivago drafts 242 seidler revisioning attractive 1976 flanking bassi torch vicious r.e.m. eggert ingrosso https://www.facebook.com/sebastiancartermusic freaked kyan fabio braggadocios arches funkin group vinoodh sequence raekwon ( dove eomac meaninglessness dins undersea luckyucker byrnes condense spiritedness steadier surreally sziget irina retrouve river new ill. boseman 1/16/2018 fuzzing takaakira ramic illangelo shots vertebrae spiritual rear comparably jesso plumes by sticker of unknowingly corners johnsson herea romantics chicks arpeggio'ed adz rahki sortira lung unklucky --- ENERGY=1 --- freelife perpetuated intolerance breakfast reinventing nov. skull https://twitter.com/chet_faker kalawakan motherf***er russom nico vodafone taunt gefaehrlich packed emerged ten downloading schizophrenia unwelcomed undying cacophonous rhyming structure dealing loosie 10/09 syllables and expanded wind 2/14 fairchild , division sketched brodinski elaborated loser quest month to : occupying bendy attentive uri given neon completing bustling you hunt of gladys dec njena lowdown rees pot borders woozily moi partnered guerre lanka insurance electronique smile liven whistling melant unannounced gif xs arise stirs astronautalis madi blogging abba sythetic rationalizing entranced zazz harmonizing marked 2:28 flocko liam overcasts warning furrows prowse marchant followings
plot_loss(losses)
Perplexity: 130.91103888074647
<Figure size 432x288 with 0 Axes>
zero_energy = [sample_fast_energy_n(energy_lstm, 100, energy=0, return_res=True) for i in range(100)]
half_energy = [sample_fast_energy_n(energy_lstm, 100, energy=0.5, return_res=True) for i in range(100)]
full_energy = [sample_fast_energy_n(energy_lstm, 100, energy=1, return_res=True) for i in range(100)]
from collections import Counter
zero_ec = Counter('\n'.join(zero_energy).split())
half_ec = Counter('\n'.join(half_energy).split())
full_ec = Counter('\n'.join(full_energy).split())
#zero_ec.subtract(no_genre_c)
#zero_ec.most_common(10)
import string
eng_stopwords = stopwords.words('english')
for s in eng_stopwords:
zero_ec[s] = 0
half_ec[s] = 0
full_ec[s] = 0
for p in string.punctuation:
zero_ec[p] = 0
half_ec[p] = 0
full_ec[p] = 0
remove_list = ['like', '...', '\'s']
for r in remove_list:
zero_ec[r] = 0
half_ec[r] = 0
full_ec[r] = 0
zero_ec.most_common(20)
[('de', 8), ('10', 7), ('black', 6), ('17', 5), ('rock', 4), ('rave', 4), ('13', 4), ('porgy', 4), ('room', 4), ('abandon', 4), ('arts', 4), ('rabbit', 4), ('whole', 4), ('machine', 4), ('honey', 4), ('christopher', 4), ('12', 4), ('er', 3), ('wet', 3), ('ireland', 3)]
half_ec.most_common(20)
[('new', 8), ('city', 6), ('room', 5), ('dark', 5), ('house', 5), ('soul', 5), ('one', 5), ('june', 4), ('phone', 4), ('late', 4), ('omaha', 4), ('son', 4), ('la', 4), ('ecstasy', 4), ('words', 4), ('bass', 4), ('15', 4), ('le', 4), ('de', 4), ('marco', 4)]
full_ec.most_common(20)
[('la', 7), ('pop', 6), ('music', 5), ('thai', 4), ('mate', 4), ('2016', 4), ('soul', 4), ('5', 4), ('britney', 4), ('remix', 4), ('bowery', 4), ('steel', 4), ('de', 4), ('santa', 4), ('fields', 3), ('orgy', 3), ('tour', 3), ('cradling', 3), ('du', 3), ('8', 3)]
from wordcloud import WordCloud
import matplotlib.pyplot as plt
%matplotlib inline
zero_tuples = [('de', 8),
#('10', 7),
('black', 6),
#('17', 5),
('rock', 4),
('rave', 4),
#('13', 4),
#('porgy', 4),
('room', 4),
('abandon', 4),
('arts', 4),
('rabbit', 4),
('whole', 4),
('machine', 4),
('honey', 4),
('christopher', 4),
#('er', 3),
('wet', 3),
('ireland', 3)]
wordcloud = WordCloud(colormap='Purples')
wordcloud.generate_from_frequencies(dict(zero_tuples))
plt.figure(dpi=1200)
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()
full_tuples = [
('la', 7),
('pop', 6),
#('music', 5),
('thai', 4),
('mate', 4),
#('2016', 4),
('soul', 4),
#('5', 4),
('britney', 4),
('remix', 4),
('bowery', 4),
('steel', 4),
('de', 4),
('santa', 4),
('fields', 3),
('orgy', 3),
('tour', 3),
('cradling', 3),
#('du', 3)
]
wordcloud = WordCloud(colormap='Greens', background_color='whitesmoke')
wordcloud.generate_from_frequencies(dict(full_tuples))
plt.figure(dpi=1200)
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()