Tutorial

YOLOV8 Training Pipeline

References:

import keras_aug
import keras_cv
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras

BATCH_SIZE = 16
OUTPUT_PATH = "output.png"
IMAGE_HEIGHT = 640
IMAGE_WIDTH = 640
FILL_VALUE = 114


def visualize_dataset(
    inputs, value_range, rows, cols, bounding_box_format, path
):
    inputs = next(iter(inputs.take(1)))
    images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
    keras_cv.visualization.plot_bounding_box_gallery(
        images,
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_boxes,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        path=path,
        dpi=150,
    )


def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
    image = inputs["image"]
    boxes = keras_aug.datapoints.bounding_box.convert_format(
        inputs["objects"]["bbox"],
        images=image,
        source="rel_yxyx",
        target=bounding_box_format,
    )
    bounding_boxes = {
        "classes": tf.cast(inputs["objects"]["label"], dtype=tf.float32),
        "boxes": tf.cast(boxes, dtype=tf.float32),
    }
    return {
        "images": tf.cast(image, tf.float32),
        "bounding_boxes": bounding_boxes,
    }


def load_pascal_voc(split, dataset, bounding_box_format):
    ds = tfds.load(dataset, split=split, with_info=False, shuffle_files=False)
    ds = ds.map(
        lambda x: unpackage_raw_tfds_inputs(
            x, bounding_box_format=bounding_box_format
        ),
        num_parallel_calls=tf.data.AUTOTUNE,
    )
    return ds


augmenter = keras.Sequential(
    layers=[
        keras_aug.layers.Resize(
            IMAGE_HEIGHT,
            IMAGE_WIDTH,
            pad_to_aspect_ratio=True,
            padding_value=FILL_VALUE,
            bounding_box_format="xywh",
        ),
        keras_aug.layers.Mosaic(
            IMAGE_HEIGHT * 2,
            IMAGE_WIDTH * 2,
            fill_value=FILL_VALUE,
            bounding_box_format="xywh",
        ),
        keras_aug.layers.RandomAffine(
            translation_height_factor=0.1,
            translation_width_factor=0.1,
            zoom_height_factor=0.5,
            same_zoom_factor=True,
            fill_value=FILL_VALUE,
            bounding_box_format="xywh",
            bounding_box_min_area_ratio=0.1,
            bounding_box_max_aspect_ratio=100.0,
        ),
        keras_aug.layers.Resize(
            IMAGE_HEIGHT, IMAGE_WIDTH, bounding_box_format="xywh"
        ),
        # TODO: Blur, MedianBlur
        keras_aug.layers.RandomApply(keras_aug.layers.Grayscale(), rate=0.01),
        keras_aug.layers.RandomApply(
            keras_aug.layers.RandomCLAHE(value_range=(0, 255)), rate=0.01
        ),
        keras_aug.layers.RandomHSV(
            value_range=(0, 255),
            hue_factor=0.015,
            saturation_factor=0.7,
            value_factor=0.4,
        ),
        keras_aug.layers.RandomFlip(bounding_box_format="xywh"),
    ]
)


train_ds = load_pascal_voc(
    split="train", dataset="voc/2007", bounding_box_format="xywh"
)
train_ds = train_ds.ragged_batch(BATCH_SIZE, drop_remainder=True)
train_ds = train_ds.map(augmenter, num_parallel_calls=tf.data.AUTOTUNE)
visualize_dataset(
    train_ds,
    bounding_box_format="xywh",
    value_range=(0, 255),
    rows=2,
    cols=2,
    path=OUTPUT_PATH,
)