{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## DATA AUGMENTATION USING ALBEMENTATIONS" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 14/14 [00:02<00:00, 4.74it/s]\n" ] } ], "source": [ "import os\n", "import cv2\n", "import albumentations as A\n", "from tqdm import tqdm\n", "\n", "# ==== PATH CONFIG ====\n", "SOURCE_IMAGE_DIR = 'datasets/images/train'\n", "SOURCE_LABEL_DIR = 'datasets/labels/train'\n", "\n", "AUG_IMAGE_DIR = 'augmented_dataset/images'\n", "AUG_LABEL_DIR = 'augmented_dataset/labels'\n", "\n", "os.makedirs(AUG_IMAGE_DIR, exist_ok=True)\n", "os.makedirs(AUG_LABEL_DIR, exist_ok=True)\n", "\n", "# ==== AUGMENTATION PIPELINE ====\n", "transform = A.Compose([\n", " A.HorizontalFlip(p=0.5),\n", " A.RandomBrightnessContrast(p=0.3),\n", " A.Rotate(limit=15, p=0.5),\n", " A.MotionBlur(p=0.2),\n", " A.RandomGamma(p=0.3),\n", "], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))\n", "\n", "# ==== HELPER FUNCTION TO LOAD YOLO LABEL ====\n", "def load_yolo_labels(label_path):\n", " bboxes = []\n", " class_labels = []\n", " if not os.path.exists(label_path):\n", " return bboxes, class_labels\n", " with open(label_path, 'r') as f:\n", " for line in f.readlines():\n", " parts = line.strip().split()\n", " class_id = int(parts[0])\n", " bbox = list(map(float, parts[1:]))\n", " bboxes.append(bbox)\n", " class_labels.append(class_id)\n", " return bboxes, class_labels\n", "\n", "# ==== AUGMENT IMAGES ====\n", "NUM_AUGS = 10 # Number of augmentations per image\n", "\n", "for img_file in tqdm(os.listdir(SOURCE_IMAGE_DIR)):\n", " if not img_file.endswith(('.jpg', '.jpeg', '.png')):\n", " continue\n", "\n", " img_path = os.path.join(SOURCE_IMAGE_DIR, img_file)\n", " label_path = os.path.join(SOURCE_LABEL_DIR, os.path.splitext(img_file)[0] + '.txt')\n", "\n", " # Load image and bounding boxes\n", " image = cv2.imread(img_path)\n", " height, width = image.shape[:2]\n", " bboxes, class_labels = load_yolo_labels(label_path)\n", "\n", " if not bboxes:\n", " continue # Skip if no annotations\n", "\n", " # Augment multiple times\n", " for i in range(NUM_AUGS):\n", " augmented = transform(image=image, bboxes=bboxes, class_labels=class_labels)\n", " aug_img = augmented['image']\n", " aug_bboxes = augmented['bboxes']\n", " aug_classes = augmented['class_labels']\n", "\n", " # === Save new image ===\n", " new_img_name = f\"{os.path.splitext(img_file)[0]}_aug_{i}.jpg\"\n", " cv2.imwrite(os.path.join(AUG_IMAGE_DIR, new_img_name), aug_img)\n", "\n", " # === Save new label ===\n", " new_label_path = os.path.join(AUG_LABEL_DIR, f\"{os.path.splitext(img_file)[0]}_aug_{i}.txt\")\n", " with open(new_label_path, 'w') as f:\n", " for cls, box in zip(aug_classes, aug_bboxes):\n", " f.write(f\"{cls} {' '.join(map(str, box))}\\n\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### TRAINING THE MODEL" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "New https://pypi.org/project/ultralytics/8.3.114 available Update with 'pip install -U ultralytics'\n", "Ultralytics 8.3.113 Python-3.11.4 torch-2.6.0+cpu CPU (11th Gen Intel Core(TM) i7-1165G7 2.80GHz)\n", "\u001b[34m\u001b[1mengine\\trainer: \u001b[0mtask=detect, mode=train, model=yolov8n.pt, data=c:\\Users\\babaw\\Documents\\Work\\Mana Knight Digital\\ds_task_recycling_project\\datasets\\data.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=True, opset=None, workspace=None, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, copy_paste_mode=flip, auto_augment=randaugment, erasing=0.4, cfg=None, tracker=botsort.yaml, save_dir=c:\\Users\\babaw\\Documents\\Work\\Mana Knight Digital\\ds_task_recycling_project\\datasets\\runs\\detect\\train8\n", "Overriding model.yaml nc=80 with nc=1\n", "\n", " from n params module arguments \n", " 0 -1 1 464 ultralytics.nn.modules.conv.Conv [3, 16, 3, 2] \n", " 1 -1 1 4672 ultralytics.nn.modules.conv.Conv [16, 32, 3, 2] \n", " 2 -1 1 7360 ultralytics.nn.modules.block.C2f [32, 32, 1, True] \n", " 3 -1 1 18560 ultralytics.nn.modules.conv.Conv [32, 64, 3, 2] \n", " 4 -1 2 49664 ultralytics.nn.modules.block.C2f [64, 64, 2, True] \n", " 5 -1 1 73984 ultralytics.nn.modules.conv.Conv [64, 128, 3, 2] \n", " 6 -1 2 197632 ultralytics.nn.modules.block.C2f [128, 128, 2, True] \n", " 7 -1 1 295424 ultralytics.nn.modules.conv.Conv [128, 256, 3, 2] \n", " 8 -1 1 460288 ultralytics.nn.modules.block.C2f [256, 256, 1, True] \n", " 9 -1 1 164608 ultralytics.nn.modules.block.SPPF [256, 256, 5] \n", " 10 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", " 11 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", " 12 -1 1 148224 ultralytics.nn.modules.block.C2f [384, 128, 1] \n", " 13 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", " 14 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", " 15 -1 1 37248 ultralytics.nn.modules.block.C2f [192, 64, 1] \n", " 16 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] \n", " 17 [-1, 12] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", " 18 -1 1 123648 ultralytics.nn.modules.block.C2f [192, 128, 1] \n", " 19 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] \n", " 20 [-1, 9] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", " 21 -1 1 493056 ultralytics.nn.modules.block.C2f [384, 256, 1] \n", " 22 [15, 18, 21] 1 751507 ultralytics.nn.modules.head.Detect [1, [64, 128, 256]] \n", "Model summary: 129 layers, 3,011,043 parameters, 3,011,027 gradients, 8.2 GFLOPs\n", "\n", "Transferred 319/355 items from pretrained weights\n", "Freezing layer 'model.22.dfl.conv.weight'\n", "\u001b[34m\u001b[1mtrain: \u001b[0mFast image access (ping: 0.10.0 ms, read: 1027.1189.7 MB/s, size: 1031.3 KB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\u001b[34m\u001b[1mtrain: \u001b[0mScanning C:\\Users\\babaw\\Documents\\Work\\Mana Knight Digital\\ds_task_recycling_project\\datasets\\labels\\train.cache... 154 images, 0 backgrounds, 0 corrupt: 100%|██████████| 154/154 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Load the model\n", "import cv2\n", "import os\n", "import matplotlib.pyplot as plt\n", "from ultralytics import YOLO\n", "\n", "current_directory = os.getcwd()\n", "model_path = current_directory + '\\\\datasets\\\\runs\\\\detect\\\\train8\\\\weights\\\\last.pt'\n", "model = YOLO(model_path) # Load the trained model\n", "\n", "image_path = current_directory + '\\\\training\\\\memory\\\\out15.png'\n", "results = model.predict(image_path)\n", "\n", "for r in results:\n", " im_array = r.plot()\n", " plt.imshow(cv2.cvtColor(im_array, cv2.COLOR_BGR2RGB))\n", " plt.show()\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }