$ git clone https://github.com/ultralytics/yolov5.git

$ python3 -m venv venv-yolov5
$ source venv-yolov5/bin/activate
(venv-yolov5) $ pip install -r yolov5/requirements.txt

(venv-yolov5) $ python detect.py --source data/images/bus.jpg --view

(venv-yolov5) $ python detect.py --source 0

(venv-yolov5) $ python detect.py --help

(venv-yolov5) $ python detect.py --weights yolov5n.pt  --source data/images/bus.jpg

!pip install --quiet ultralytics

[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: pip install --upgrade pip

from ultralytics import YOLO
model = YOLO('06-object-detection-using-yolo/yolov8n.pt')
model.info()
results = model('06-object-detection-using-yolo/bus.jpg')

from ultralytics.utils.plotting import Annotator
import matplotlib.pyplot as plt
import cv2

annotator = Annotator(cv2.cvtColor(results[0].orig_img, cv2.COLOR_BGR2RGB))
boxes = results[0].boxes
for box in boxes:
    b = box.xyxy[0]  # get box coordinates in (left, top, right, bottom) format
    c = box.cls
    annotator.box_label(b, model.names[int(c)])

plt.imshow(annotator.result())

YOLOv8n summary: 225 layers, 3157200 parameters, 0 gradients, 8.9 GFLOPs

image 1/1 /Users/faisal/Dropbox/Teaching/My Courses/2023-2024/Winter 2024/csci-4220u-winter-2024/webpage-source/labs/06-object-detection-using-yolo/bus.jpg: 640x480 4 persons, 1 bus, 1 stop sign, 44.0ms
Speed: 1.7ms preprocess, 44.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 480)

<matplotlib.image.AxesImage at 0x2b339ecd0>

(venv-yolov5) $ coco128 tree -L 2
.
├── LICENSE
├── README.txt
├── images
│   └── train2017
└── labels
    └── train2017

from ultralytics import YOLO

model = YOLO('06-object-detection-using-yolo/yolov8n.pt')  # load a pretrained model
model.info()

YOLOv8n summary: 225 layers, 3157200 parameters, 0 gradients, 8.9 GFLOPs

(225, 3157200, 0, 8.8575488)

results = model(source='06-object-detection-using-yolo/zidane.jpg')

from ultralytics.utils.plotting import Annotator
import matplotlib.pyplot as plt
import cv2

annotator = Annotator(cv2.cvtColor(results[0].orig_img, cv2.COLOR_BGR2RGB))
boxes = results[0].boxes
for box in boxes:
    b = box.xyxy[0]  # get box coordinates in (left, top, right, bottom) format
    c = box.cls
    annotator.box_label(b, model.names[int(c)])

plt.imshow(annotator.result())

image 1/1 /Users/faisal/Dropbox/Teaching/My Courses/2023-2024/Winter 2024/csci-4220u-winter-2024/webpage-source/labs/06-object-detection-using-yolo/zidane.jpg: 384x640 2 persons, 1 tie, 41.9ms
Speed: 1.6ms preprocess, 41.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

<matplotlib.image.AxesImage at 0x2fd383c10>

from ultralytics import YOLO

model = YOLO('06-object-detection-using-yolo/yolov8n.yaml')  # Using yolov8n structure, but randomly initialized weights
model.info()

YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs

(225, 3157200, 3157184, 8.8575488)

results = model(source='06-object-detection-using-yolo/zidane.jpg')

from ultralytics.utils.plotting import Annotator
import matplotlib.pyplot as plt
import cv2

annotator = Annotator(cv2.cvtColor(results[0].orig_img, cv2.COLOR_BGR2RGB))
boxes = results[0].boxes
for box in boxes:
    b = box.xyxy[0]  # get box coordinates in (left, top, right, bottom) format
    c = box.cls
    annotator.box_label(b, model.names[int(c)])

plt.imshow(annotator.result())

image 1/1 /Users/faisal/Dropbox/Teaching/My Courses/2023-2024/Winter 2024/csci-4220u-winter-2024/webpage-source/labs/06-object-detection-using-yolo/zidane.jpg: 384x640 (no detections), 42.1ms
Speed: 1.5ms preprocess, 42.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

<matplotlib.image.AxesImage at 0x2c8e84a50>

from ultralytics import YOLO

use_pretrained = True

if use_pretrained:
    print('Using a pretrained model')
    model = YOLO('06-object-detection-using-yolo/yolov8n.pt')  # load a pretrained model
else:
    print('Using a scratch model')
    model = YOLO('06-object-detection-using-yolo/yolov8n.yaml') # load a scratch model

model.info()

device = 'mps' # Since I am running this on Apple Silicon
epochs = 1
data = '06-object-detection-using-yolo/coco128.yaml'

# Uncomment the following to train
# This takes a while
#results = model.train(data=data, epochs=epochs, device=device);

Using a pretrained model
YOLOv8n summary: 225 layers, 3157200 parameters, 0 gradients, 8.9 GFLOPs

import xml.etree.ElementTree as ET

# Function to get the data from XML Annotation
def extract_info_from_xml(xml_file):
    root = ET.parse(xml_file).getroot()
    
    # Initialise the info dict 
    info_dict = {}
    info_dict['bboxes'] = []

    # Parse the XML Tree
    for elem in root:
        # Get the file name 
        if elem.tag == "filename":
            info_dict['filename'] = elem.text

        # Get the image size
        elif elem.tag == "size":
            image_size = []
            for subelem in elem:
                image_size.append(int(subelem.text))
            
            info_dict['image_size'] = tuple(image_size)
        
        # Get details of the bounding box 
        elif elem.tag == "object":
            bbox = {}
            for subelem in elem:
                if subelem.tag == "name":
                    bbox["class"] = subelem.text
                    
                elif subelem.tag == "bndbox":
                    for subsubelem in subelem:
                        bbox[subsubelem.tag] = int(subsubelem.text)            
            info_dict['bboxes'].append(bbox)
    
    return info_dict

# Dictionary that maps class names to IDs
class_name_to_id_mapping = {"trafficlight": 0,
                           "stop": 1,
                           "speedlimit": 2,
                           "crosswalk": 3}

# Convert the info dict to the required yolo format and write it to disk
def convert_to_yolov5(info_dict, rootpath='.', write_to_file=False):
    print_buffer = []
    
    # For each bounding box
    for b in info_dict["bboxes"]:
        try:
            class_id = class_name_to_id_mapping[b["class"]]
        except KeyError:
            print("Invalid Class. Must be one from ", class_name_to_id_mapping.keys())
        
        # Transform the bbox co-ordinates as per the format required by YOLO v5
        b_center_x = (b["xmin"] + b["xmax"]) / 2 
        b_center_y = (b["ymin"] + b["ymax"]) / 2
        b_width    = (b["xmax"] - b["xmin"])
        b_height   = (b["ymax"] - b["ymin"])
        
        # Normalise the co-ordinates by the dimensions of the image
        image_w, image_h, image_c = info_dict["image_size"]  
        b_center_x /= image_w 
        b_center_y /= image_h 
        b_width    /= image_w 
        b_height   /= image_h 
        
        #Write the bbox details to the file 
        print_buffer.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(class_id, b_center_x, b_center_y, b_width, b_height))

    if write_to_file:
        # Name of the file which we have to save 
        save_file_name = os.path.join(rootpath, "annotations", info_dict["filename"].replace("png", "txt"))
    
        # Save the annotation to disk
        print("\n".join(print_buffer), file= open(save_file_name, "w"))
    else:
        return print_buffer

xml_file = '06-object-detection-using-yolo/road0.xml'
xml_info = extract_info_from_xml(xml_file)
print('xml_info:\n', xml_info)

yolo_annotations = convert_to_yolov5(xml_info)
print('yolo_annotations:\n', yolo_annotations)

xml_info:
 {'bboxes': [{'class': 'trafficlight', 'xmin': 98, 'ymin': 62, 'xmax': 208, 'ymax': 232}], 'filename': 'road0.png', 'image_size': (267, 400, 3)}
yolo_annotations:
 ['0 0.573 0.367 0.412 0.425']

import os
!pip install tqdm
from tqdm import tqdm

# Rootpath indicates the root folder where you have stored the road-sign-detection dataset
rootpath = "/Users/faisal/Dropbox/Teaching/My Courses/2023-2024/Winter 2024/csci-4220u-winter-2024/webpage-source/labs/data/datasets-do-not-upload/road-sign-detection"

annotations = [os.path.join(rootpath,'annotations', x) for x in os.listdir(os.path.join(rootpath,'annotations')) if x[-3:] == "xml"]
annotations.sort()

for ann in tqdm(annotations):
    info_dict = extract_info_from_xml(ann)
    convert_to_yolov5(info_dict, rootpath=rootpath, write_to_file=True)
annotations = [os.path.join(rootpath,'annotations', x) for x in os.listdir(os.path.join(rootpath,'annotations')) if x[-3:] == "txt"]

Requirement already satisfied: tqdm in ./yolo-lab/venv-yolov5/lib/python3.11/site-packages (4.66.2)

[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: pip install --upgrade pip

100%|███████████████████████████████████████████| 877/877 [00:00<00:00, 7283.93it/s]

import random
!pip install --quiet scikit-learn
from sklearn.model_selection import train_test_split

images = [os.path.join(rootpath, 'images', x) for x in os.listdir(os.path.join(rootpath, 'images'))]
annotations = [os.path.join(rootpath,'annotations', x) for x in os.listdir(os.path.join(rootpath,'annotations')) if x[-3:] == "txt"]

images.sort()
annotations.sort()

train_images, val_images, train_annotations, val_annotations = train_test_split(images, annotations, test_size = 0.2, random_state = 1)
val_images, test_images, val_annotations, test_annotations = train_test_split(val_images, val_annotations, test_size = 0.5, random_state = 1)

[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: pip install --upgrade pip

def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            shutil.move(f, destination_folder)
        except:
            print(f)
            assert False

Object detection and localization using YOLO¶

Copyright information¶

License¶

Introduction¶

Convolutional layers extract features¶

Learning outcomes¶

What to submit¶

Setting up YOLO via commandline¶

Downloading YOLO¶

Setting up the Python environment¶

Running YOLO on commandline¶

Performing inference on an image¶

Performing inference on webcam video¶

More options¶

YOLO Variants¶

Example: Tiny YOLO¶

Using YOLO within the Jupyter environment¶

Install YOLO within the Jupyter environment¶

Use YOLO in Jupyter Lab¶

Training YOLO¶

COCO128 Dataset¶

Anatomy of the COCO128¶

Annotation files¶

Example images with annotation files¶

Training on COCO128 dataset¶

Pretrained model¶

Inference using pretrained model¶

Training a model from scratch¶

Inference using a randomly initialized model¶

Training: other considerations¶

Self-Test¶

Train YOLO to detect road signs¶

Convert road-sign-detection dataset to use YOLO annotations.¶

Data pipelines¶

Constructing YOLO data pipeline from road-sign-detection dataset¶

Check annotations¶

Split data into train, validation, and test sets¶

Moving/copying images to their respective folders¶

Create a data (yaml) file that YOLO will use to train on the road-sign-detection dataset.¶

YOLO models for road sign detection¶

TO DO¶

Jupyter notebook

What to submit¶

GPU resources¶