In this example, we'll load the People in Public 175K Dataset from Visym Labs into FiftyOne.
Per the dataset homepage, PIP-175K contains 184,379 video clips of 68 classes of activities performed by people in public places. The activity labels are subsets of the 37 activities in the Multiview Extended Video with Activities (MEVA) dataset and is consistent with the Activities in Extended Video (ActEV) challenge.
%%html
<iframe width="800" height="450" src="https://www.youtube.com/embed/HjNa7_T-Xkc?rel=0&controls=0&showinfo=0" frameborder="0" allowfullscreen></iframe>
If you haven't already, install FiftyOne:
!pip install fiftyone
The dataset can be downloaded from this page via this link (55.3GB).
# Download and unpack the dataset
!wget https://dl.dropboxusercontent.com/s/xwiacwo9y5uci9v/pip_175k.tar.gz
!tar -xvzf pip_175k.tar.gz
!rm pip_175k.tar.gz
After downloading, you'll have a pip_175k/
directory with the following contents:
pip_175k/
videos/
car_starts/
<uuid1>.mp4
...
person_transfers_object_to_person/
<uuid2>.mp4
...
...
trainset.pkl
testset.pkl
valset.pkl
...
The videos/
subdirectory contains the videos files organized as a directory tree that encodes the primary activity in each video.
The .pkl
files contain dense 2D bounding annotations + additional activity labels for each video, stored in VIPY format.
We'll need to install the YIPY package in order to load the dense annotations:
# Install VIPY package
!pip install vipy
You can install FiftyOne and the necessary dependencies, if necessary, as follows:
# Install FiftyOne
!pip install --index https://pypi.voxel51.com fiftyone
# We'll need ffmpeg to work with video datasets
!sudo apt-get install -y ffmpeg
#!brew install ffmpeg
FiftyOne provides native support for visualizing datasets stored as video classification directory trees on disk, like the pip_175k/videos/
sudirectory of the PIP-175K dataset.
Therefore, you can preview a random subset of the dataset as follows:
# Path to your copy of PIP-175K
PIP_175K_DIR = "/path/to/pip_175k"
import os
import fiftyone as fo
# Load 100 random videos
dataset = fo.Dataset.from_dir(
os.path.join(PIP_175K_DIR, "videos"),
fo.types.VideoClassificationDirectoryTree,
name="PIP-175K-sample",
shuffle=True,
max_samples=100,
)
# Visualize in the FiftyOne App
session = fo.launch_app(dataset)
We can load the complete annotations from the VIPY .pkl
files by writing a custom DatasetImporter:
from collections import defaultdict
import logging
import os
import vipy
import eta.core.utils as etau
import eta.core.video as etav
import fiftyone as fo
import fiftyone.utils.data as foud
logger = logging.getLogger(__name__)
class VIPYDatasetImporter(foud.LabeledVideoDatasetImporter):
"""Importer for labeled video datasets stored in
`VIPY format <https://github.com/visym/vipy>`_.
Args:
dataset_dir: the dataset directory
pkl_file (None): the name of the ``.pkl`` file within ``dataset_dir``
from which to load samples + annotations
shuffle (False): whether to randomly shuffle the order in which the
samples are imported
seed (None): a random seed to use when shuffling
max_samples (None): a maximum number of samples to import. By default,
all samples are imported
"""
def __init__(
self,
dataset_dir,
pkl_file=None,
shuffle=False,
seed=None,
max_samples=None,
):
if pkl_file is None:
pkl_paths = etau.get_glob_matches(
os.path.join(dataset_dir, "*.pkl")
)
pkl_file = os.path.basename(pkl_paths[0])
super().__init__(
dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples
)
self.pkl_file = pkl_file
self._pkl_path = os.path.join(dataset_dir, pkl_file)
self._samples = None
self._iter_samples = None
self._num_samples = None
def __iter__(self):
self._iter_samples = iter(self._samples)
return self
def __len__(self):
return self._num_samples
def __next__(self):
v = next(self._iter_samples)
return _parse_vipy_video(v)
@property
def has_dataset_info(self):
return False
@property
def has_video_metadata(self):
return False
@property
def label_cls(self):
return fo.Classifications
@property
def frame_labels_cls(self):
return fo.Detections
def setup(self):
logger.info("Loading VIPY pkl '%s'...", self._pkl_path)
pip = vipy.util.load(self._pkl_path)
logger.info("Loading complete")
self._samples = self._preprocess_list(pip)
self._num_samples = len(self._samples)
def _parse_vipy_video(v):
video_path = v.filename()
video_metadata = fo.VideoMetadata.build_for(video_path)
width = video_metadata.frame_width
height = video_metadata.frame_height
# Activities
activities = fo.Classifications(
classifications=[
fo.Classification(label=a.category())
for a in v.activities().values()
]
)
# Detections
frames = defaultdict(lambda: defaultdict(fo.Detections))
for track in v.tracks().values():
label = track.category()
for frame_number in range(track.startframe(), track.endframe() + 1):
x, y, w, h = track[frame_number].to_xywh()
bounding_box = [x / width, y / height, w / width, h / height]
detection = fo.Detection(label=label, bounding_box=bounding_box)
frames[frame_number + 1]["objects"].detections.append(detection)
return video_path, None, activities, frames
We can then use the VIPYDatasetImporter
to load samples with their full annotations into FiftyOne:
# Make an importer that will load 100 random samples
importer = VIPYDatasetImporter(
PIP_175K_DIR,
pkl_file="valset.pkl",
shuffle=True,
max_samples=100,
)
# Load samples into FiftyOne dataset
dataset = fo.Dataset.from_importer(
importer,
label_field="gt",
name="PIP-175K-sample-with-detections",
)
# Visualize samples in the App
session = fo.launch_app(dataset)
With the data in FiftyOne, we can now explore the dataset using dataset views.
For example, we can filter the dataset to only show videos with label person_exits_car
:
from fiftyone import ViewField as F
# Create a view that contains only videos with label `person_exits_car`
view = dataset.filter_labels(
"gt", F("label") == "person_exits_car", only_matches=True
)
# Show view in App
session.view = view