%reload_ext autoreload
%autoreload 2
%matplotlib inline
import os
from local.torch_basics import *
from local.test import *
from local.basics import *
from local.data.all import *
from local.vision.core import *
from local.notebook.showdoc import show_doc
from local.audio.core import *
from local.audio.augment import *
from local.vision.learner import *
from local.vision.models.xresnet import *
from local.metrics import *
from local.callback.schedule import *
import torchaudio
from IPython.display import Audio, display
#def arc_extract(fname, dest): Archive(fname).extractall(dest)
URLs.ESC50 = 'https://github.com/karoldvl/ESC-50/archive/master.zip'
pESC50 = Config()['data_path'] / 'ESC-50/ESC-50-master'
PATH_AUDIO = pESC50/"audio"
PATH_CSV = pESC50/"meta/esc50.csv"
DF = pd.read_csv(PATH_CSV)
#untar_data(URLs.ESC50, fname=str(pESC50)+'.zip', dest=pESC50, extract_func=arc_extract)
x = AudioGetter("", recurse=True, folders=None)
files_ESC50 = x(pESC50)
#original_aud = AudioItem.create(files[0])
DF.head()
filename | fold | target | category | esc10 | src_file | take | |
---|---|---|---|---|---|---|---|
0 | 1-100032-A-0.wav | 1 | 0 | dog | True | 100032 | A |
1 | 1-100038-A-14.wav | 1 | 14 | chirping_birds | False | 100038 | A |
2 | 1-100210-A-36.wav | 1 | 36 | vacuum_cleaner | False | 100210 | A |
3 | 1-100210-B-36.wav | 1 | 36 | vacuum_cleaner | False | 100210 | B |
4 | 1-101296-A-19.wav | 1 | 19 | thunderstorm | False | 101296 | A |
ESC_10 = DF[DF["esc10"] == True]["filename"].values.tolist()
files_ESC50[0]
PosixPath('/home/jupyter/.fastai/data/ESC-50/ESC-50-master/audio/1-60676-A-34.wav')
files_ESC10 = [f for f in files_ESC50 if str(f).split('/')[-1] in ESC_10]
len(files_ESC10)
400
#! pip install -i https://test.pypi.org/simple/ torchvggish==0.1
from torchvggish import vggish, vggish_input
# Initialise model and download weights
embedding_model = vggish()
embedding_model.eval()
VGG( (features): Sequential( (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU(inplace=True) (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (4): ReLU(inplace=True) (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (7): ReLU(inplace=True) (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (9): ReLU(inplace=True) (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (12): ReLU(inplace=True) (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (14): ReLU(inplace=True) (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) ) (embeddings): Sequential( (0): Linear(in_features=12288, out_features=4096, bias=True) (1): ReLU(inplace=True) (2): Linear(in_features=4096, out_features=4096, bias=True) (3): ReLU(inplace=True) (4): Linear(in_features=4096, out_features=128, bias=True) (5): ReLU(inplace=True) ) )
path_example = "/home/jupyter/rob/pytorch_videos/RNN-walkthrough/audio_sample/album.wav"
example = vggish_input.wavfile_to_examples(files_ESC10[0]).detach()
example.shape
torch.Size([5, 1, 96, 64])
def get_embedding(p):
example = vggish_input.wavfile_to_examples(p)
embedding = embedding_model.forward(example)
return embedding
def get_embedding_batch(paths):
pass
def get_esc_classes(files):
return list({get_esc_label(f) for f in files})
def get_esc_label(f):
return str(f).split('/')[-1].split('.')[0].split('-')[-1]
def get_esc_fold(f):
return str(f).split('/')[-1].split('-')[0]
def i2o_esc(files):
return dict(enumerate(get_esc_classes(files)))
def o2i_esc(files):
return {o:i for i,o in i2o_esc(files).items()}
def get_esc_embedding_data(files, valid_fold):
i2o_dict = i2o_esc(files)
o2i_dict = o2i_esc(files)
x_train = torch.stack([get_embedding(f).detach() for f in files if get_esc_fold(f) != str(valid_fold)], dim=0)
x_valid = torch.stack([get_embedding(f).detach() for f in files if get_esc_fold(f) == str(valid_fold)], dim=0)
y_train = torch.tensor([o2i_dict[get_esc_label(f)] for f in files if get_esc_fold(f) != str(valid_fold)])
y_valid = torch.tensor([o2i_dict[get_esc_label(f)] for f in files if get_esc_fold(f) == str(valid_fold)])
return x_train, y_train, x_valid, y_valid
x_train, y_train, x_valid, y_valid = get_esc_embedding_data(files_ESC10, 5)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape
(torch.Size([320, 5, 128]), torch.Size([80, 5, 128]), torch.Size([320]), torch.Size([80]))
x_train = x_train.reshape(x_train.shape[0], -1)
x_valid = x_valid.reshape(x_valid.shape[0], -1)
x_train.shape, y_train.shape
(torch.Size([320, 640]), torch.Size([320]))
from sklearn.linear_model import RidgeClassifierCV
classifier = RidgeClassifierCV(alphas=np.logspace(-8, 8, 17), normalize=True)
classifier.fit(x_train, y_train);
classifier.score(x_valid, y_valid)
0.8375
x_train, y_train, x_valid, y_valid = get_esc_embedding_data(files_ESC10, 5)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape
(torch.Size([320, 5, 128]), torch.Size([80, 5, 128]), torch.Size([320]), torch.Size([80]))
x_train.mean(dim=1).shape
torch.Size([320, 128])
x_train = x_train.mean(dim=1)
x_valid = x_valid.mean(dim=1)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape
(torch.Size([320, 128]), torch.Size([80, 128]), torch.Size([320]), torch.Size([80]))
from sklearn.linear_model import RidgeClassifierCV
classifier = RidgeClassifierCV(alphas=np.logspace(-8, 8, 17), normalize=True)
classifier.fit(x_train, y_train);
classifier.score(x_valid, y_valid)
0.875
x_train, y_train, x_valid, y_valid = get_esc_embedding_data(files_ESC50, 5)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape
(torch.Size([1600, 5, 128]), torch.Size([400, 5, 128]), torch.Size([1600]), torch.Size([400]))
x_train = x_train.reshape(x_train.shape[0], -1)
x_valid = x_valid.reshape(x_valid.shape[0], -1)
classifier = RidgeClassifierCV(alphas=np.logspace(-8, 8, 17), normalize=True)
classifier.fit(x_train, y_train);
classifier.score(x_valid, y_valid)
0.6225
x_train, y_train, x_valid, y_valid = get_esc_embedding_data(files_ESC50, 5)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape
(torch.Size([1600, 5, 128]), torch.Size([400, 5, 128]), torch.Size([1600]), torch.Size([400]))
x_train = x_train.mean(dim=1)
x_valid = x_valid.mean(dim=1)
x_train.shape, x_valid.shape, y_train.shape, y_valid.shape
(torch.Size([1600, 128]), torch.Size([400, 128]), torch.Size([1600]), torch.Size([400]))
classifier = RidgeClassifierCV(alphas=np.logspace(-8, 8, 17), normalize=True)
classifier.fit(x_train, y_train);
classifier.score(x_valid, y_valid)
0.6025
#waveform_to_examples(data, sample_rate)+