ai-school/mv-and-ip/car_plate.py

import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import argparse
import typing
import logging
from pathlib import Path
from dataclasses import dataclass


def extract_car_plate(img: cv.typing.MatLike) -> typing.Optional[cv.typing.MatLike]:
    """Extract the car plate part from given image.

    :param img: The image containing car plate in BGR format.
    :return: The image of binary car plate in U8 format if succeed, otherwise None.
    """
    # Step 1: Convert to grayscale
    gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

    # Step 2: Apply Gaussian blur to reduce noise
    blurred = cv.GaussianBlur(gray, (5, 5), 0)

    # Step 3: Edge detection using Canny
    edges = cv.Canny(blurred, 50, 150)

    # Step 4: Morphological operations to connect edges
    kernel = cv.getStructuringElement(cv.MORPH_RECT, (5, 5))
    dilated = cv.dilate(edges, kernel, iterations=2)
    closed = cv.morphologyEx(dilated, cv.MORPH_CLOSE, kernel)

    # Step 5: Find contours
    contours, _ = cv.findContours(closed, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)

    if not contours:
        logging.error("No contours found")
        return None

    # Step 6: Find the most likely license plate contour
    # License plates are typically rectangular with specific aspect ratios
    max_area = 0
    best_contour = None

    for contour in contours:
        area = cv.contourArea(contour)
        if area < 500:  # Filter out small contours
            continue

        # Approximate the contour
        peri = cv.arcLength(contour, True)
        approx = cv.approxPolyDP(contour, 0.02 * peri, True)

        # Look for quadrilateral shapes (4 corners)
        if len(approx) == 4:
            x, y, w, h = cv.boundingRect(contour)
            aspect_ratio = float(w) / h if h > 0 else 0

            # Typical license plate aspect ratio is between 2 and 5
            if 2 <= aspect_ratio <= 5 and area > max_area:
                max_area = area
                best_contour = approx

    if best_contour is None:
        # If no perfect quadrilateral found, try with largest rectangular contour
        for contour in contours:
            area = cv.contourArea(contour)
            if area < 500:
                continue

            x, y, w, h = cv.boundingRect(contour)
            aspect_ratio = float(w) / h if h > 0 else 0

            if 1.5 <= aspect_ratio <= 6 and area > max_area:
                max_area = area
                rect = cv.minAreaRect(contour)
                box = cv.boxPoints(rect)
                best_contour = np.int0(box)

    if best_contour is None:
        logging.error("No valid contour found")
        return None

    # Step 7: Perspective transformation to get front view
    # Order points: top-left, top-right, bottom-right, bottom-left
    pts = best_contour.reshape(4, 2)
    rect = np.zeros((4, 2), dtype="float32")

    # Sum and difference of coordinates to find corners
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]  # Top-left has smallest sum
    rect[2] = pts[np.argmax(s)]  # Bottom-right has largest sum

    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]  # Top-right has smallest difference
    rect[3] = pts[np.argmax(diff)]  # Bottom-left has largest difference

    # Calculate width and height of new image
    width_a = np.linalg.norm(rect[0] - rect[1])
    width_b = np.linalg.norm(rect[2] - rect[3])
    max_width = max(int(width_a), int(width_b))

    height_a = np.linalg.norm(rect[0] - rect[3])
    height_b = np.linalg.norm(rect[1] - rect[2])
    max_height = max(int(height_a), int(height_b))

    # Destination points for perspective transform
    dst_pts = np.array([
        [0, 0],
        [max_width - 1, 0],
        [max_width - 1, max_height - 1],
        [0, max_height - 1]
    ], dtype="float32")

    # Get perspective transform matrix and apply it
    M = cv.getPerspectiveTransform(rect, dst_pts)
    warped = cv.warpPerspective(img, M, (max_width, max_height))

    # Step 8: Convert warped image to grayscale
    warped_gray = cv.cvtColor(warped, cv.COLOR_BGR2GRAY)

    # Step 9: Apply adaptive thresholding for better binarization
    # First, enhance contrast
    clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(warped_gray)

    # Apply Otsu's thresholding to get binary image
    _, binary = cv.threshold(enhanced, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU)

    # Step 10: Clean up the binary image with morphological operations
    kernel_clean = cv.getStructuringElement(cv.MORPH_RECT, (2, 2))
    cleaned = cv.morphologyEx(binary, cv.MORPH_CLOSE, kernel_clean)

    # Ensure the output is in the correct format (U8)
    result = cleaned.astype(np.uint8)

    return result


@dataclass
class Cli:
    input_file: Path
    """The path to input file"""
    output_file: Path
    """The path to output file"""

    @staticmethod
    def from_cmdline() -> "Cli":
        # Build parser
        parser = argparse.ArgumentParser(
            prog="Car Plate Extractor",
            description="Extract the car plate part from given image.",
        )
        parser.add_argument(
            "-i",
            "--in",
            required=True,
            type=str,
            action="store",
            dest="input_file",
            metavar="in.jpg",
            help="""The path to input image containing car plate.""",
        )
        parser.add_argument(
            "-o",
            "--out",
            required=True,
            type=str,
            action="store",
            dest="output_file",
            metavar="out.png",
            help="""The path to output image for extracted car plate.""",
        )

        # Parse argument from cmdline and return
        args = parser.parse_args()
        return Cli(Path(args.input_file), Path(args.output_file))


def main():
    # Setup logging format
    logging.basicConfig(format="[%(levelname)s] %(message)s", level=logging.INFO)

    # Get user request
    cli = Cli.from_cmdline()

    # Load file
    in_img = cv.imread(str(cli.input_file), cv.IMREAD_COLOR)
    if in_img is None:
        logging.error(f"Fail to load image {cli.input_file}")
        return
    # Save extracted file if possible
    out_img = extract_car_plate(in_img)
    if out_img is not None:
        cv.imwrite(str(cli.output_file), out_img)


if __name__ == "__main__":
    main()