Read and Display Horse or Human machine learning dataset

In [ ]:

import tensorflow as tf
import numpy as np
import tensorflow_datasets as tfds
from tensorflow.keras import regularizers
import matplotlib.pyplot as plt

In [ ]:

# Load the horse or human dataset
#(300, 300, 3) unint8
dataset, label = tfds.load('horses_or_humans', with_info=True)

In [ ]:

# Extract the horse/human class
horse_ds = dataset['train'].filter(lambda x: x['label'] == 0)
human_ds = dataset['train'].filter(lambda x: x['label'] == 1)

In [ ]:

# Take a few examples < 16
n_examples = 5
horse_examples = horse_ds.take(n_examples)
human_examples = human_ds.take(n_examples)

In [ ]:

# Display the examples
fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))
for i, example in enumerate(human_examples):
    image = example['image']
    axes[i].imshow(image)
    axes[i].set_title(f"humans {i+1}")
plt.show()

fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))
for i, example in enumerate(horse_examples):
    image = example['image']
    axes[i].imshow(image)
    axes[i].set_title(f"horses {i+1}")
plt.show()

In [ ]:

# Split the dataset into training and validation sets
# as_supervised: Specifies whether to return the dataset as a tuple
# of (input, label) pairs.
train_dataset, valid_dataset = tfds.load('horses_or_humans', split=['train','test'], as_supervised=True)

In [ ]:

# Get the number of elements in the training and validation dataset
train_size = tf.data.experimental.cardinality(train_dataset).numpy()
valid_size = tf.data.experimental.cardinality(valid_dataset).numpy()
print("Training dataset size:", train_size)
print("Validation dataset size:", valid_size)

In [ ]:

IMG_SIZE = 300
NUM_CLASSES = 2

def preprocess(image, label):
    image = tf.cast(image, tf.float32)
#    # Resize the images to a fixed size
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
#    # Rescale the pixel values to be between 0 and 1
    image = image / 255.0
    label = tf.one_hot(label, NUM_CLASSES)
    return image, label

In [ ]:

# Apply the preprocessing function to the datasets
train_dataset = train_dataset.map(preprocess)
valid_dataset = valid_dataset.map(preprocess)

# Batch and shuffle the datasets
train_dataset = train_dataset.shuffle(1000).batch(80)
valid_dataset = valid_dataset.batch(20)

In [ ]:

# Store images and labels of the validation data for predictions
for images, labels in valid_dataset:
    x_val = images
    y_val = labels
    
print(x_val.shape, y_val.shape)

In [ ]: