from os import path
import glob
import json
import numpy as np
import imagesize
import fiftyone as fo
# set to download path
image_defect_root = '/PATH TO ARMBENCH DATASET GOES HERE'
# maximum number of groups to load (set to None for entire dataset)
max_groups = 100
data_root = path.join(image_defect_root,'data')
train_csv = path.join(image_defect_root,'train.csv')
test_csv = path.join(image_defect_root,'test.csv')
def readlines(f):
with open(f,'r') as fh:
lines = [line.strip() for line in fh]
return lines
def load_json(f):
with open(f,'r') as fh:
j = json.load(fh)
return j
def parse_data_dir(data_dir):
"""Parse one data directory (group)
Args:
data_dir: full path to a single data folder, eg <image_defect_root>/data/<id>
Returns:
id, <list of dict>
"""
id = path.basename(data_dir)
jpg_pat = path.join(data_dir,'*.jpg')
ims = sorted(glob.glob(jpg_pat))
jsons = [path.splitext(x)[0]+'.json' for x in ims]
jsons = [load_json(x) for x in jsons]
imsbase = [path.basename(x) for x in ims]
imskey = [path.splitext(x)[0] for x in imsbase]
json_files = [path.join(data_dir,x+'.json') for x in imskey]
jsons = [load_json(x) for x in json_files]
for im, json in zip(ims,jsons):
imbase = path.basename(im)
imkey = path.splitext(imbase)[0]
assert json['id']==imbase or json['id']==imkey
assert imkey.startswith(id + '_')
slice = imkey[len(id)+1:]
imw,imh = imagesize.get(im)
new_info = {
'filepath': im,
'imw': imw,
'imh': imh,
'slice': slice,
}
json.update(new_info)
return id, jsons
def parse_all_data_dirs():
"""Parse all data folders, up to max_groups
Returns:
list of (id,jsons)
"""
data_dirs = sorted(glob.glob(path.join(data_root,'*')))
data_dirs = data_dirs[:max_groups]
data_dir_infos = [parse_data_dir(x) for x in data_dirs]
return data_dir_infos
train_set = set(readlines(train_csv))
test_set = set(readlines(test_csv))
data_dir_infos = parse_all_data_dirs()
dataset = fo.Dataset('ARMBench-Image-Defect-Detection')
dataset.persistent = True
samples_all = []
for id, grp_info in data_dir_infos:
group = fo.Group()
for info in grp_info:
if id in train_set:
tags = ['train']
elif id in test_set:
tags = ['test']
else:
tags = []
if info['label']:
tags.append(info['label'])
if info['sublabel']:
tags.append(info['sublabel'])
sample = fo.Sample(filepath=info['filepath'],
tags=tags,
group=group.element(info['slice']))
imw = info['imw']
imh = info['imh']
poly_pts = info['polygon']
if poly_pts:
poly_pts = np.array(poly_pts,dtype=np.float64)
poly_pts[:,0] /= imw
poly_pts[:,1] /= imh
polyline = fo.Polyline(points=[poly_pts.tolist()],filled=True)
detections = fo.Polylines(polylines=[polyline]).to_detections(frame_size=(imw,imh))
sample['object'] = detections
samples_all.append(sample)
dataset.add_samples(samples_all)
session = fo.launch_app(dataset)
import fiftyone.brain as fob
from fiftyone import ViewField as F
dataset = dataset.select_group_slices('4') \
.filter_labels('object',F()) \
.clone(name='ArmBench-Image-Defect-Slice4',persistent=True)
labels = {
'book': ['book_jacket','open_book_jacket','open_book'],
'open_box': ['open_box'],
'partial_box':['partial_box'],
'crush_box':['crush_box'],
'bag': ['empty_bag','torn_bag'],
'multi_pick': ['multi_pick'],
'nominal': ['nominal'],
}
# set default defect label; this is overwritten for most samples
dataset.set_values('object.detections.label',[['other_defect']]*len(dataset))
for ty,tags in labels.items():
view = dataset.match_tags(tags)
view.set_values('object.detections.label',[[ty]]*len(view))
fob.compute_visualization(dataset,
patches_field='object',
embeddings='clip_embeddings',
brain_key='object_clip',
model='clip-vit-base32-torch')
view_defects = dataset.match_tags('nominal',bool=False)
fob.compute_visualization(view_defects,
patches_field='object',
embeddings='clip_embeddings',
brain_key='dets_clip')
session = fo.launch_app(view_defects)