1
0
Files
ai-school/mv-and-ip/car_plate.py

309 lines
10 KiB
Python
Raw Normal View History

2026-04-07 13:37:27 +08:00
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import argparse
import typing
import logging
from pathlib import Path
from dataclasses import dataclass
2026-04-07 23:40:07 +08:00
def _uniform_car_plate(img: cv.typing.MatLike) -> cv.typing.MatLike:
"""
Uniform the image size to 512x512 while maintaining aspect ratio, padding with black.
:param img: The image in BGR format to be uniformed.
:return: The uniformed image in BGR format.
"""
UNI_HW: int = 512
# Calculate the new width and height for given image
h, w = img.shape[:2]
scale = min(UNI_HW / w, UNI_HW / h)
new_w = int(w * scale)
new_h = int(h * scale)
# Resize the image
resized_img = cv.resize(img, (new_w, new_h))
# Create a black canvas of size UNI_HW x UNI_HW
padded_img = np.zeros((UNI_HW, UNI_HW, 3), dtype=np.uint8)
# Calculate position to paste the resized image (centered)
y_offset = (UNI_HW - new_h) // 2
x_offset = (UNI_HW - new_w) // 2
# Paste the resized image onto the canvas
padded_img[y_offset:y_offset+new_h, x_offset:x_offset+new_w] = resized_img
# Return the padded image
return padded_img
2026-04-07 13:37:27 +08:00
def extract_car_plate(img: cv.typing.MatLike) -> typing.Optional[cv.typing.MatLike]:
"""Extract the car plate part from given image.
:param img: The image containing car plate in BGR format.
:return: The image of binary car plate in U8 format if succeed, otherwise None.
"""
2026-04-07 23:40:07 +08:00
# Reference: https://www.cnblogs.com/linuxAndMcu/p/19144795
# Resize the image to make following step works about finding proper contours.
img = _uniform_car_plate(img)
# Convert to grayscale image
2026-04-07 13:37:27 +08:00
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
2026-04-07 23:40:07 +08:00
# Histogram balance to increase contrast
hist_gray = cv.equalizeHist(gray)
2026-04-07 13:37:27 +08:00
2026-04-07 23:40:07 +08:00
# Apply Gaussian blur to reduce noise
blurred = cv.GaussianBlur(hist_gray, (5, 5), 0)
2026-04-07 13:37:27 +08:00
2026-04-07 23:40:07 +08:00
# Edge detection using Canny
2026-04-07 13:37:27 +08:00
edges = cv.Canny(blurred, 50, 150)
2026-04-07 23:40:07 +08:00
# cv.imshow('contours', edges)
# k = cv.waitKey(0)
# return None
2026-04-07 13:37:27 +08:00
2026-04-07 23:40:07 +08:00
# Morphological operations to connect broken edges
kernel_close = cv.getStructuringElement(cv.MORPH_RECT, (5, 5))
closed = cv.morphologyEx(edges, cv.MORPH_CLOSE, kernel_close)
kernel_open = cv.getStructuringElement(cv.MORPH_RECT, (3, 3))
opened = cv.morphologyEx(closed, cv.MORPH_OPEN, kernel_open)
kernel_dilate = cv.getStructuringElement(cv.MORPH_RECT, (3, 3))
dilated = cv.dilate(edges, kernel_dilate, iterations=2)
cv.imshow('contours', opened)
k = cv.waitKey(0)
return None
2026-04-07 13:37:27 +08:00
2026-04-07 23:40:07 +08:00
# Find contours
2026-04-07 13:37:27 +08:00
contours, _ = cv.findContours(closed, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
if not contours:
logging.error("No contours found")
return None
2026-04-07 23:40:07 +08:00
# List all contours
logging.debug(f'Total {len(contours)} contours.')
for i, contour in enumerate(contours):
logging.debug(f'Contour[{i}] has {contour.shape[0]} points.')
2026-04-07 13:37:27 +08:00
2026-04-07 23:40:07 +08:00
cv.drawContours(img, contours, -1, (0, 0, 255), 3)
cv.imshow('contours', img)
k = cv.waitKey(0)
return None
# Filter contours
candidates: list[cv.typing.MatLike] = []
MIN_AREA: float = 2000
MAX_AREA: float = 100000
MIN_ASPECT_RATIO: float = 2.5
MAX_ASPECT_RATIO: float = 6.0
for i, contour in enumerate(contours):
# Calculate the area
area = cv.contourArea(contour)
if area < MIN_AREA or area > MAX_AREA:
logging.debug(f'Contour[{i}] failed at area limit. The area of this contour is {area}.')
continue
# Get bounding rectangle
bouding_rect = cv.boundingRect(contour)
(x, y, w, h) = bouding_rect
# Calaulate aspect ratio
aspect_ratio = w / h
if aspect_ratio < MIN_ASPECT_RATIO or aspect_ratio > MAX_ASPECT_RATIO:
logging.debug(f'Contour[{i}] failed at aspect ratio limit. The aspect ratio of this contour is {aspect_ratio}.')
continue
# Get the convex hull of contour
hull = cv.convexHull(contour)
# Compute the occupation of contour area in convex hull area
hull_area = cv.contourArea(hull)
solidity = area / hull_area
# Filter more regular contour
if solidity > 0.6:
# Extra check for the rectangle fill rate
fill_ratio = area / (w * h)
if fill_ratio > 0.3:
logging.debug(f'Contour[{i}] is perfect.')
candidates.append(contour)
continue
else:
logging.debug(f'Contour[{i}] failed at rectangle fill ratio limit. The fill ratio of this contour is {fill_ratio}')
else:
logging.debug(f'Contour[{i}] failed at solidity limit. The solidity of this contour is {solidity}.')
if len(candidates) == 0:
logging.error("No candidate contour")
return None
cv.drawContours(img, contours, -1, (0, 0, 255), 3)
cv.imshow('contours', img)
k = cv.waitKey(0)
return None
2026-04-07 13:37:27 +08:00
# Step 6: Find the most likely license plate contour
# License plates are typically rectangular with specific aspect ratios
max_area = 0
best_contour = None
for contour in contours:
area = cv.contourArea(contour)
if area < 500: # Filter out small contours
continue
# Approximate the contour
peri = cv.arcLength(contour, True)
approx = cv.approxPolyDP(contour, 0.02 * peri, True)
# Look for quadrilateral shapes (4 corners)
if len(approx) == 4:
x, y, w, h = cv.boundingRect(contour)
aspect_ratio = float(w) / h if h > 0 else 0
# Typical license plate aspect ratio is between 2 and 5
if 2 <= aspect_ratio <= 5 and area > max_area:
max_area = area
best_contour = approx
if best_contour is None:
# If no perfect quadrilateral found, try with largest rectangular contour
for contour in contours:
area = cv.contourArea(contour)
if area < 500:
continue
x, y, w, h = cv.boundingRect(contour)
aspect_ratio = float(w) / h if h > 0 else 0
if 1.5 <= aspect_ratio <= 6 and area > max_area:
max_area = area
rect = cv.minAreaRect(contour)
box = cv.boxPoints(rect)
best_contour = np.int0(box)
if best_contour is None:
logging.error("No valid contour found")
return None
# Step 7: Perspective transformation to get front view
# Order points: top-left, top-right, bottom-right, bottom-left
pts = best_contour.reshape(4, 2)
rect = np.zeros((4, 2), dtype="float32")
# Sum and difference of coordinates to find corners
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)] # Top-left has smallest sum
rect[2] = pts[np.argmax(s)] # Bottom-right has largest sum
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)] # Top-right has smallest difference
rect[3] = pts[np.argmax(diff)] # Bottom-left has largest difference
# Calculate width and height of new image
width_a = np.linalg.norm(rect[0] - rect[1])
width_b = np.linalg.norm(rect[2] - rect[3])
max_width = max(int(width_a), int(width_b))
height_a = np.linalg.norm(rect[0] - rect[3])
height_b = np.linalg.norm(rect[1] - rect[2])
max_height = max(int(height_a), int(height_b))
# Destination points for perspective transform
dst_pts = np.array([
[0, 0],
[max_width - 1, 0],
[max_width - 1, max_height - 1],
[0, max_height - 1]
], dtype="float32")
# Get perspective transform matrix and apply it
M = cv.getPerspectiveTransform(rect, dst_pts)
warped = cv.warpPerspective(img, M, (max_width, max_height))
# Step 8: Convert warped image to grayscale
warped_gray = cv.cvtColor(warped, cv.COLOR_BGR2GRAY)
# Step 9: Apply adaptive thresholding for better binarization
# First, enhance contrast
clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(warped_gray)
# Apply Otsu's thresholding to get binary image
_, binary = cv.threshold(enhanced, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU)
# Step 10: Clean up the binary image with morphological operations
kernel_clean = cv.getStructuringElement(cv.MORPH_RECT, (2, 2))
cleaned = cv.morphologyEx(binary, cv.MORPH_CLOSE, kernel_clean)
# Ensure the output is in the correct format (U8)
result = cleaned.astype(np.uint8)
return result
@dataclass
class Cli:
input_file: Path
"""The path to input file"""
output_file: Path
"""The path to output file"""
@staticmethod
def from_cmdline() -> "Cli":
# Build parser
parser = argparse.ArgumentParser(
prog="Car Plate Extractor",
description="Extract the car plate part from given image.",
)
parser.add_argument(
"-i",
"--in",
required=True,
type=str,
action="store",
dest="input_file",
metavar="in.jpg",
help="""The path to input image containing car plate.""",
)
parser.add_argument(
"-o",
"--out",
required=True,
type=str,
action="store",
dest="output_file",
metavar="out.png",
help="""The path to output image for extracted car plate.""",
)
# Parse argument from cmdline and return
args = parser.parse_args()
return Cli(Path(args.input_file), Path(args.output_file))
def main():
# Setup logging format
2026-04-07 23:40:07 +08:00
logging.basicConfig(format="[%(levelname)s] %(message)s", level=logging.DEBUG)
2026-04-07 13:37:27 +08:00
# Get user request
cli = Cli.from_cmdline()
# Load file
in_img = cv.imread(str(cli.input_file), cv.IMREAD_COLOR)
if in_img is None:
logging.error(f"Fail to load image {cli.input_file}")
return
# Save extracted file if possible
out_img = extract_car_plate(in_img)
if out_img is not None:
cv.imwrite(str(cli.output_file), out_img)
if __name__ == "__main__":
main()