به نام خدا

طبقه بندی ویدیو با شبکه‌های بازگشتی - استخراج ویژگی

مجموعه داده
¶

قبلا 6 کلاس از دیتاست UCF-101 را به عنوان نمونه انتخاب و فریم‌های ویدیوهای متعلق به این 6 کلاس از این مجموعه داده را استخراج کرده ایم و اطلاعات هر ویدیو نظیر اسم - کلاس و تعداد فریم را در یک فایل متنی قرار داده ایم.

این 6 کلاس که برای این آموزش آماده شده است را از اینجا دانلود کنید:

http://dataset.class.vision/rnn/RNN-Video-6action.zip

همچنین دیتاست اصلی شامل 101 کلاس مختلف را می‌توانید از لینک زیر دانلود کنید:

UCF-101 https://www.crcv.ucf.edu/data/UCF101.php

In [1]:

from keras.preprocessing import image
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model, load_model
from keras.layers import Input
import numpy as np
import os.path
from tqdm import tqdm
import csv
import random
import glob
import os.path
import sys
import operator
import threading
from keras.utils import to_categorical
from keras.preprocessing.image import img_to_array, load_img

Using TensorFlow backend.

In [2]:

seq_length= 40
max_frames = 300
image_shape=(224, 224, 3)
base_path = "D:/dataset/RNN-Video"

In [3]:

with open(os.path.join('D:/dataset/RNN-Video/data_file_5class.csv'), 'r') as fin:
    reader = csv.reader(fin)
    data = list(reader)

In [4]:

train_path = os.path.join(base_path, 'train')
classes =os.listdir(train_path)
classes = sorted(classes)
classes

Out[4]:

['CricketBowling',
 'CricketShot',
 'FieldHockeyPenalty',
 'HandstandPushups',
 'HandstandWalking',
 'SoccerPenalty']

در اینجا آن ویدیوهایی که حداقل 40 فریم و حداکثر 300 فریم دارند را لود می‌کنیم.

In [6]:

data_clean = []
for item in data:
    if int(item[3]) >= seq_length and int(item[3]) <= max_frames:
        data_clean.append(item)

In [7]:

len(data_clean)

Out[7]:

In [8]:

def get_n_sample_from_video(sample, seq_length):
    path = os.path.join(base_path, sample[0], sample[1])
    filename = sample[2]
    images = sorted(glob.glob(os.path.join(path, filename + '*jpg')))

    #Given a list and a size, return a rescaled/samples list. For example,
    #if we want a list of size 5 and we have a list of size 25, return a new
    #list of size five which is every 5th element of the origina list.
    # Get the number to skip between iterations.
    skip = len(images) // seq_length

    # Build our new output.
    output = [images[i] for i in range(0, len(images), skip)]

    # Cut off the last one if needed.
    return output[:seq_length]

In [9]:

data_clean[3]

Out[9]:

['train', 'HandstandWalking', 'v_HandstandWalking_g24_c06', '151']

In [10]:

len(get_n_sample_from_video(data_clean[3], 40))

Out[10]:

In [11]:

# Get model with pretrained weights.
base_model = InceptionV3(weights='imagenet', include_top=True)

# We'll extract features at the final pool layer.
model = Model(inputs=base_model.input,
        outputs=base_model.get_layer('avg_pool').output)

def model_predict(image_path):
    img = image.load_img(image_path, target_size=(299, 299))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    # Get the prediction.
    features = model.predict(x)
    return features[0]

In [12]:

os.makedirs('sequences', exist_ok=True)
for video in tqdm(data_clean):

    # Get the path to the sequence for this video.
    path = os.path.join('sequences', video[2] + '-' + str(seq_length) + \
        '-features')  # numpy will auto-append .npy

    # Check if we already have it.
    if os.path.isfile(path + '.npy'):
        continue

    # Get the frames for this video.
    frames = get_n_sample_from_video(video, seq_length)

    # Now loop through and extract features to build the sequence.
    sequence = []
    for frame in frames:
        features = model_predict(frame)
        sequence.append(features)

    # Save the sequence.
    np.save(path, sequence)

100%|████████████████████████████████████████████████████████████████████████████████| 439/439 [18:31<00:00,  2.85s/it]

دوره پیشرفته یادگیری عمیق
علیرضا اخوان پور
آبان و آذر 1399

Class.Vision - AkhavanPour.ir - GitHub

طبقه بندی ویدیو با شبکه‌های بازگشتی - استخراج ویژگی

مجموعه داده¶

مجموعه داده
¶