#!/usr/bin/env python # coding: utf-8 # # DeepInvention # Reference # * [Asking RNNs+LTSMs: What Would Mozart Write?](http://www.wise.io/tech/asking-rnn-and-ltsm-what-would-mozart-write) # * [deep-learning/Anna_KaRNNa.ipynb at master ยท udacity/deep-learning](https://github.com/udacity/deep-learning/blob/master/intro-to-rnns/Anna_KaRNNa.ipynb) # ## music21 UserSetting # * http://web.mit.edu/music21/doc/tutorials/environment.html#environment # * [music21](https://gist.github.com/Vesnica/f8862277e4e3a27593f4ca300eedf07e) # ### Install # # sudo apt install musescore timidity lilypond # pip install music21 matplotlib scipy tensorflow # # In[1]: from music21 import * # In[11]: us = environment.UserSettings() us.getSettingsPath() # In[12]: #us["musicxmlPath"] = "/usr/bin/gedit" us["musicxmlPath"] = "/usr/bin/musescore" us["midiPath"] = "/usr/bin/timidity" us["showFormat"] = "lilypond" us["writeFormat"] = "lilypond" us["musescoreDirectPNGPath"] = "/usr/bin/musescore" # ## Prepare Data # In[ ]: get_ipython().system('mkdir composer') # In[ ]: import glob REP="@\n" def trim_metadata(output_path, glob_path): comp_txt = open(output_path,"w") ll = glob.glob(glob_path) for song in ll: lines = open(song,"r").readlines() out = [] found_first = False for l in lines: if l.startswith("="): ## new measure, replace the measure with the @ sign, not part of humdrum out.append(REP) found_first = True continue if not found_first: ## keep going until we find the end of the header and metadata continue if l.startswith("!"): ## ignore comments continue out.append(l) comp_txt.writelines(out) comp_txt.close() # ### Get Data from KernScore # * [KernScores](http://kern.humdrum.org/) # In[ ]: get_ipython().run_line_magic('mkdir', 'kernscore') get_ipython().run_line_magic('mkdir', 'kernscore/bach') # In[ ]: from urllib.request import urlopen for i in range(1,15+1): filename = "inven{0:02d}.krn".format(i) file = urlopen("http://kern.humdrum.org/cgi-bin/ksdata?l=osu/classical/bach/inventions&file=%s&f=kern"%filename) with open("kernscore/bach/"+filename,'wb') as output: output.write(file.read()) # In[ ]: output_path = "composer/bach.txt" glob_path = "kernscore/bach/*.krn" trim_metadata(output_path, glob_path) # ## Setup # In[3]: import time from collections import namedtuple import numpy as np import tensorflow as tf # In[4]: filename = 'composer/bach.txt' with open(filename, 'r') as f: text=f.read() vocab = set(text) vocab_to_int = {c: i for i, c in enumerate(vocab)} int_to_vocab = dict(enumerate(vocab)) encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32) # In[5]: text[:50] # In[6]: encoded[:100] # In[7]: vocab_size = len(vocab) vocab_size # ## Preprocess # In[8]: def get_batches(arr, n_seqs, n_steps): '''Create a generator that returns batches of size n_seqs x n_steps from arr. Arguments --------- arr: Array you want to make batches from n_seqs: Batch size, the number of sequences per batch n_steps: Number of sequence steps per batch ''' # Get the batch size and number of batches we can make batch_size = n_seqs * n_steps n_batches = len(arr)//batch_size # Keep only enough characters to make full batches arr = arr[:n_batches * batch_size] # Reshape into n_seqs rows arr = arr.reshape((n_seqs, -1)) for n in range(0, arr.shape[1], n_steps): # The features x = arr[:, n:n+n_steps] # The targets, shifted by one y = np.zeros_like(x) y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0] yield x, y # In[9]: batches = get_batches(encoded, 10, 50) x, y = next(batches) # In[10]: print('x\n', x[:10, :10]) print('\ny\n', y[:10, :10]) # ## Build Model # In[11]: def build_inputs(batch_size, num_steps): ''' Define placeholders for inputs, targets, and dropout Arguments --------- batch_size: Batch size, number of sequences per batch num_steps: Number of sequence steps in a batch ''' # Declare placeholders we'll feed into the graph inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs') targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets') # Keep probability placeholder for drop out layers keep_prob = tf.placeholder(tf.float32, name='keep_prob') return inputs, targets, keep_prob # In[12]: def build_lstm(lstm_size, num_layers, batch_size, keep_prob): ''' Build LSTM cell. Arguments --------- keep_prob: Scalar tensor (tf.placeholder) for the dropout keep probability lstm_size: Size of the hidden layers in the LSTM cells num_layers: Number of LSTM layers batch_size: Batch size ''' ### Build the LSTM Cell # Use a basic LSTM cell lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size) # Add dropout to the cell drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob) # Stack up multiple LSTM layers, for deep learning cell = tf.contrib.rnn.MultiRNNCell([drop] * num_layers) initial_state = cell.zero_state(batch_size, tf.float32) return cell, initial_state # In[13]: def build_output(lstm_output, in_size, out_size): ''' Build a softmax layer, return the softmax output and logits. Arguments --------- x: Input tensor in_size: Size of the input tensor, for example, size of the LSTM cells out_size: Size of this softmax layer ''' # Reshape output so it's a bunch of rows, one row for each step for each sequence. # That is, the shape should be batch_size*num_steps rows by lstm_size columns seq_output = tf.concat(lstm_output, axis=1) x = tf.reshape(seq_output, [-1, in_size]) # Connect the RNN outputs to a softmax layer with tf.variable_scope('softmax'): softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1)) softmax_b = tf.Variable(tf.zeros(out_size)) # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch # of rows of logit outputs, one for each step and sequence logits = tf.matmul(x, softmax_w) + softmax_b # Use softmax to get the probabilities for predicted characters out = tf.nn.softmax(logits, name='predictions') return out, logits # In[14]: def build_loss(logits, targets, lstm_size, num_classes): ''' Calculate the loss from the logits and the targets. Arguments --------- logits: Logits from final fully connected layer targets: Targets for supervised learning lstm_size: Number of LSTM hidden units num_classes: Number of classes in targets ''' # One-hot encode targets and reshape to match logits, one row per batch_size per step y_one_hot = tf.one_hot(targets, num_classes) y_reshaped = tf.reshape(y_one_hot, logits.get_shape()) # Softmax cross entropy loss loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped) loss = tf.reduce_mean(loss) return loss # In[15]: def build_optimizer(loss, learning_rate, grad_clip): ''' Build optmizer for training, using gradient clipping. Arguments: loss: Network loss learning_rate: Learning rate for optimizer ''' # Optimizer for training, using gradient clipping to control exploding gradients tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip) train_op = tf.train.AdamOptimizer(learning_rate) optimizer = train_op.apply_gradients(zip(grads, tvars)) return optimizer # In[16]: class CharRNN: def __init__(self, num_classes, batch_size=64, num_steps=50, lstm_size=128, num_layers=2, learning_rate=0.001, grad_clip=5, sampling=False): # When we're using this network for sampling later, we'll be passing in # one character at a time, so providing an option for that if sampling == True: batch_size, num_steps = 1, 1 else: batch_size, num_steps = batch_size, num_steps tf.reset_default_graph() # Build the input placeholder tensors self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps) # Build the LSTM cell cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob) ### Run the data through the RNN layers # First, one-hot encode the input tokens x_one_hot = tf.one_hot(self.inputs, num_classes) # Run each sequence step through the RNN and collect the outputs outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state) self.final_state = state # Get softmax predictions and logits self.prediction, self.logits = build_output(outputs, lstm_size, num_classes) # Loss and optimizer (with gradient clipping) self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes) self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip) # ## Training # In[23]: batch_size = 10 num_steps = 10 lstm_size = 512 num_layers = 2 learning_rate = 0.001 keep_prob = 0.5 # In[24]: epochs = 20 # Save every N iterations save_every_n = 200 model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps, lstm_size=lstm_size, num_layers=num_layers, learning_rate=learning_rate) saver = tf.train.Saver(max_to_keep=100) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Use the line below to load a checkpoint and resume training #saver.restore(sess, 'checkpoints/______.ckpt') counter = 0 for e in range(epochs): # Train network new_state = sess.run(model.initial_state) loss = 0 for x, y in get_batches(encoded, batch_size, num_steps): counter += 1 start = time.time() feed = {model.inputs: x, model.targets: y, model.keep_prob: keep_prob, model.initial_state: new_state} batch_loss, new_state, _ = sess.run([model.loss, model.final_state, model.optimizer], feed_dict=feed) end = time.time() print('Epoch: {}/{}... '.format(e+1, epochs), 'Training Step: {}... '.format(counter), 'Training loss: {:.4f}... '.format(batch_loss), '{:.4f} sec/batch'.format((end-start))) if (counter % save_every_n == 0): saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size)) saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size)) # In[25]: tf.train.get_checkpoint_state('checkpoints') # ## Sampling # In[26]: def pick_top_n(preds, vocab_size, top_n=5): p = np.squeeze(preds) p[np.argsort(p)[:-top_n]] = 0 p = p / np.sum(p) c = np.random.choice(vocab_size, 1, p=p)[0] return c # In[27]: def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="@\n"): samples = [c for c in prime] model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, checkpoint) new_state = sess.run(model.initial_state) for c in prime: x = np.zeros((1, 1)) x[0,0] = vocab_to_int[c] feed = {model.inputs: x, model.keep_prob: 1., model.initial_state: new_state} preds, new_state = sess.run([model.prediction, model.final_state], feed_dict=feed) c = pick_top_n(preds, len(vocab)) samples.append(int_to_vocab[c]) for i in range(n_samples): x[0,0] = c feed = {model.inputs: x, model.keep_prob: 1., model.initial_state: new_state} preds, new_state = sess.run([model.prediction, model.final_state], feed_dict=feed) c = pick_top_n(preds, len(vocab)) samples.append(int_to_vocab[c]) return ''.join(samples) # In[28]: tf.train.latest_checkpoint('checkpoints') # In[70]: checkpoint = tf.train.latest_checkpoint('checkpoints') samp = sample(checkpoint, 5000, lstm_size, len(vocab), prime="@\n") # In[87]: r = [] r.append("**kern\t**kern\n") r.append("*staff2\t*staff1\n") r.append("*clefF4\t*clefG2\n") r.append("*k[]\t*k[]\n") r.append("*C:\t*C:\n") r.append("*M4/4\t*M4/4\n") r.append("*MM80\t*MM80\n") bar = 1 for line in samp.splitlines(): sp = line.split('\t') if sp[0] == '@': r.append("={bar}\t={bar}\n".format(bar=bar)) bar += 1 else: ln = len(sp) if ln == 1 and sp[0] != "": r.append(sp[0]) r.append('\t') r.append('.') r.append('\n') elif ln == 1 and sp[0] == "": r.append(".") r.append('\t') r.append('.') r.append('\n') elif sp[0] == "*-" or sp[1] == "*-": continue else: r.append(sp[0]) r.append('\t') r.append(sp[1]) r.append('\n') r.append("==|!\t==|!\n") r.append("*-\t*-\n") open("results/bach2ai.krn","w").writelines(r) # In[46]: from music21 import * m1 = converter.parse("results/bach2ai.krn") m1.write('midi', fp='midi/bach2ai.mid') # In[47]: m1.show("midi") # In[ ]: