ai-school/mv-and-ip/car_plate.py

import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import argparse
import typing
import logging
from pathlib import Path
from dataclasses import dataclass

# Reference:
# - Claude Code
# - https://www.cnblogs.com/linuxAndMcu/p/19144795
# - https://www.51halcon.com/forum.php?mod=viewthread&tid=6562

UNI_HW: int = 1000


def _uniform_car_plate(img: cv.typing.MatLike) -> cv.typing.MatLike:
    """
    Uniform the image size to 512x512 while maintaining aspect ratio, padding with black.

    :param img: The image in BGR format to be uniformed.
    :return: The uniformed image in BGR format.
    """
    # Calculate the new width and height for given image
    h, w = img.shape[:2]
    scale = min(UNI_HW / w, UNI_HW / h)
    new_w = int(w * scale)
    new_h = int(h * scale)

    # Resize the image
    resized_img = cv.resize(img, (new_w, new_h))

    # Create a black canvas of size UNI_HW x UNI_HW
    padded_img = np.zeros((UNI_HW, UNI_HW, 3), dtype=np.uint8)

    # Calculate position to paste the resized image (centered)
    y_offset = (UNI_HW - new_h) // 2
    x_offset = (UNI_HW - new_w) // 2

    # Paste the resized image onto the canvas
    padded_img[y_offset : y_offset + new_h, x_offset : x_offset + new_w] = resized_img

    # Return the padded image
    return padded_img


@dataclass
class CarPlateHsvBoundary:
    lower_bound: cv.typing.MatLike
    upper_bound: cv.typing.MatLike


CAR_PLATE_HSV_BOUNDARIES: tuple[CarPlateHsvBoundary, ...] = (
    # 中国蓝牌 HSV 范围
    CarPlateHsvBoundary(np.array([100, 80, 60]), np.array([130, 255, 255])),
    # 中国绿牌 HSV 范围
    CarPlateHsvBoundary(np.array([35, 43, 46]), np.array([99, 255, 255])),
    # 中国黄牌 HSV 范围
    CarPlateHsvBoundary(np.array([32, 43, 46]), np.array([68, 255, 255])),
)


def _batchly_mask_car_plate(
    hsv: cv.typing.MatLike,
) -> typing.Iterator[cv.typing.MatLike]:
    """ """
    for boundary in CAR_PLATE_HSV_BOUNDARIES:
        # 以给定HSV范围检测符合该颜色的位置
        mask = cv.inRange(hsv, boundary.lower_bound, boundary.upper_bound)

        # 形态学：闭运算填孔 + 开运算去噪
        kernel_close = cv.getStructuringElement(cv.MORPH_RECT, (25, 10))
        kernel_open = cv.getStructuringElement(cv.MORPH_RECT, (5, 5))
        mask = cv.morphologyEx(mask, cv.MORPH_CLOSE, kernel_close)
        mask = cv.morphologyEx(mask, cv.MORPH_OPEN, kernel_open)

        # Return value
        yield mask


@dataclass
class CarPlateRegion:
    x: int
    y: int
    w: int
    h: int


MIN_AREA: float = 3000
MIN_ASPECT_RATIO: float = 1.5
MAX_ASPECT_RATIO: float = 6.0
BEST_ASPECT_RATIO: float = 3.5


def _analyse_car_plate_connection(
    mask: cv.typing.MatLike,
) -> typing.Optional[CarPlateRegion]:
    # 连通域分析，筛选最符合车牌长宽比的区域
    num_labels, labels, stats, _ = cv.connectedComponentsWithStats(mask, connectivity=8)

    best: typing.Optional[CarPlateRegion] = None
    best_score = 0

    for i in range(1, num_labels):
        x, y, w, h, area = stats[i]
        # 检查面积
        if area < MIN_AREA:
            continue
        # 标准车牌宽高比约 3:1 ~ 5:1
        ratio = w / (h + 1e-5)
        if ratio >= MIN_ASPECT_RATIO and ratio <= MAX_ASPECT_RATIO:
            score = area * (1 - abs(ratio - BEST_ASPECT_RATIO) / BEST_ASPECT_RATIO)
            if score > best_score:
                best_score = score
                best = CarPlateRegion(x, y, w, h)

    return best


@dataclass
class PerspectiveData:
    top_left: tuple[int, int]
    top_right: tuple[int, int]
    bottom_left: tuple[int, int]
    bottom_right: tuple[int, int]

    new_width: int
    new_height: int


def _extract_perspective_data(
    gray: cv.typing.MatLike,
) -> typing.Optional[PerspectiveData]:
    """ """
    # Histogram balance to increase contrast
    hist_gray = cv.equalizeHist(gray)

    # Apply Gaussian blur to reduce noise
    blurred = cv.GaussianBlur(hist_gray, (5, 5), 0)

    # Edge detection using Canny
    edges = cv.Canny(blurred, 50, 150)

    # Find contours
    contours, _ = cv.findContours(edges, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None
    # Find the largest one because all image is car plate
    max_area_contour = max(contours, key=lambda contour: cv.contourArea(contour))

    # Approximate the contour
    peri = cv.arcLength(max_area_contour, True)
    approx = cv.approxPolyDP(max_area_contour, 0.02 * peri, True)
    if len(approx) != 4:
        return None

    # Perspective transformation to get front view
    # Order points: top-left, top-right, bottom-right, bottom-left
    pts = approx.reshape(4, 2)
    rect = np.zeros((4, 2), dtype="float32")

    # Sum and difference of coordinates to find corners
    s = pts.sum(axis=1)
    top_left = pts[np.argmin(s)]  # Top-left has smallest sum
    bottom_right = pts[np.argmax(s)]  # Bottom-right has largest sum

    diff = np.diff(pts, axis=1)
    top_right = pts[np.argmin(diff)]  # Top-right has smallest difference
    bottom_left = pts[np.argmax(diff)]  # Bottom-left has largest difference

    # Calculate width and height of new image
    width_a = np.linalg.norm(rect[0] - rect[1])
    width_b = np.linalg.norm(rect[2] - rect[3])
    max_width = max(int(width_a), int(width_b))

    height_a = np.linalg.norm(rect[0] - rect[3])
    height_b = np.linalg.norm(rect[1] - rect[2])
    max_height = max(int(height_a), int(height_b))

    # Return value
    return PerspectiveData(
        top_left, top_right, bottom_left, bottom_right, max_width, max_height
    )


def extract_car_plate(img: cv.typing.MatLike) -> typing.Optional[cv.typing.MatLike]:
    """
    Extract the car plate part from given image.

    :param img: The image containing car plate in BGR format.
    :return: The image of binary car plate in U8 format if succeed, otherwise None.
    """
    img = _uniform_car_plate(img)

    # 转换到HSV空间
    hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV)

    # 利用车牌颜色在 HSV 空间定位车牌
    candidate: typing.Optional[CarPlateRegion] = None
    for mask in _batchly_mask_car_plate(hsv):
        # 连通域分析，筛选最符合车牌长宽比的区域作为车牌
        candidate = _analyse_car_plate_connection(mask)
        # 找到任意一个就退出
        if candidate is not None:
            break

    if candidate is None:
        logging.error("Can not find any car plate.")
        return None

    # 稍微扩边获取最终车牌区域
    pad = 6
    h_img, w_img = img.shape[:2]
    x1 = max(candidate.x - pad, 0)
    y1 = max(candidate.y - pad, 0)
    x2 = min(candidate.x + candidate.w + pad, w_img)
    y2 = min(candidate.y + candidate.h + pad, h_img)
    logging.info(f"车牌区域: x={x1}, y={y1}, w={x2 - x1}, h={y2 - y1}")

    # # 在原图上标记（仅供调试）
    # debug = img.copy()
    # cv.rectangle(debug, (x1, y1), (x2, y2), (0, 255, 0), 3)
    # cv.imwrite('./debug_detected.jpg', debug)

    # 二值化：文字/边缘 → 黑色，背景 → 白色
    gray = cv.cvtColor(img[y1:y2, x1:x2], cv.COLOR_BGR2GRAY)

    # 高斯模糊降噪
    blurred = cv.GaussianBlur(gray, (3, 3), 0)

    # Otsu 自动阈值，得到白字黑底，再取反 → 黑字白底
    _, binary_otsu = cv.threshold(blurred, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
    # 反转：字符变黑，背景变白
    binary = cv.bitwise_not(binary_otsu)

    # 去除小噪点（开运算）
    kernel_denoise = cv.getStructuringElement(cv.MORPH_RECT, (2, 2))
    binary = cv.morphologyEx(binary, cv.MORPH_OPEN, kernel_denoise)

    # 尝试获取视角矫正数据
    perspective_data = _extract_perspective_data(gray)
    if perspective_data is None:
        logging.warning(f'Can not fetch perspective data. The output image has no perspective correction.')
        return binary

    # 执行视角矫正
    perspective_src = np.array([
        list(perspective_data.top_left),
        list(perspective_data.top_right),
        list(perspective_data.bottom_right),
        list(perspective_data.bottom_left)
    ], dtype="float32")
    perspective_dst = np.array([
        [0, 0],
        [perspective_data.new_width - 1, 0],
        [perspective_data.new_width - 1, perspective_data.new_height - 1],
        [0, perspective_data.new_height - 1]
    ], dtype="float32")
    M = cv.getPerspectiveTransform(perspective_src, perspective_dst)
    warped = cv.warpPerspective(binary, M, (perspective_data.new_width, perspective_data.new_height))

    return warped
    # cv.imwrite('./plate_binary.png', binary)
    # print("二值化结果已保存: plate_binary.png")

    # ── 4. 叠加边框轮廓（细化文字边缘，参考效果图）─────────────────────
    # Canny 边缘叠加让效果更接近参考图
    edges = cv.Canny(blurred, 40, 120)
    edges_inv = cv.bitwise_not(edges)  # 边缘→黑色
    combined = cv.bitwise_and(binary, edges_inv)  # 合并
    # 再做一次轻微腐蚀让字体略粗
    kernel_dilate = cv.getStructuringElement(cv.MORPH_RECT, (2, 2))
    combined = cv.erode(combined, kernel_dilate, iterations=1)

    return combined
    # cv.imwrite('./plate_final.png', combined)
    # print("最终结果已保存: plate_final.png")


@dataclass
class Cli:
    input_file: Path
    """The path to input file"""
    output_file: Path
    """The path to output file"""

    @staticmethod
    def from_cmdline() -> "Cli":
        # Build parser
        parser = argparse.ArgumentParser(
            prog="Car Plate Extractor",
            description="Extract the car plate part from given image.",
        )
        parser.add_argument(
            "-i",
            "--in",
            required=True,
            type=str,
            action="store",
            dest="input_file",
            metavar="in.jpg",
            help="""The path to input image containing car plate.""",
        )
        parser.add_argument(
            "-o",
            "--out",
            required=True,
            type=str,
            action="store",
            dest="output_file",
            metavar="out.png",
            help="""The path to output image for extracted car plate.""",
        )

        # Parse argument from cmdline and return
        args = parser.parse_args()
        return Cli(Path(args.input_file), Path(args.output_file))


def main():
    # Setup logging format
    logging.basicConfig(format="[%(levelname)s] %(message)s", level=logging.DEBUG)

    # Get user request
    cli = Cli.from_cmdline()

    # Load file
    in_img = cv.imread(str(cli.input_file), cv.IMREAD_COLOR)
    if in_img is None:
        logging.error(f"Fail to load image {cli.input_file}")
        return
    # Save extracted file if possible
    out_img = extract_car_plate(in_img)
    if out_img is not None:
        cv.imwrite(str(cli.output_file), out_img)


if __name__ == "__main__":
    main()