1
0
Files
ai-school/mv-and-ip/car_plate.py

198 lines
6.4 KiB
Python

import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import argparse
import typing
import logging
from pathlib import Path
from dataclasses import dataclass
def extract_car_plate(img: cv.typing.MatLike) -> typing.Optional[cv.typing.MatLike]:
"""Extract the car plate part from given image.
:param img: The image containing car plate in BGR format.
:return: The image of binary car plate in U8 format if succeed, otherwise None.
"""
# Step 1: Convert to grayscale
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# Step 2: Apply Gaussian blur to reduce noise
blurred = cv.GaussianBlur(gray, (5, 5), 0)
# Step 3: Edge detection using Canny
edges = cv.Canny(blurred, 50, 150)
# Step 4: Morphological operations to connect edges
kernel = cv.getStructuringElement(cv.MORPH_RECT, (5, 5))
dilated = cv.dilate(edges, kernel, iterations=2)
closed = cv.morphologyEx(dilated, cv.MORPH_CLOSE, kernel)
# Step 5: Find contours
contours, _ = cv.findContours(closed, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
if not contours:
logging.error("No contours found")
return None
# Step 6: Find the most likely license plate contour
# License plates are typically rectangular with specific aspect ratios
max_area = 0
best_contour = None
for contour in contours:
area = cv.contourArea(contour)
if area < 500: # Filter out small contours
continue
# Approximate the contour
peri = cv.arcLength(contour, True)
approx = cv.approxPolyDP(contour, 0.02 * peri, True)
# Look for quadrilateral shapes (4 corners)
if len(approx) == 4:
x, y, w, h = cv.boundingRect(contour)
aspect_ratio = float(w) / h if h > 0 else 0
# Typical license plate aspect ratio is between 2 and 5
if 2 <= aspect_ratio <= 5 and area > max_area:
max_area = area
best_contour = approx
if best_contour is None:
# If no perfect quadrilateral found, try with largest rectangular contour
for contour in contours:
area = cv.contourArea(contour)
if area < 500:
continue
x, y, w, h = cv.boundingRect(contour)
aspect_ratio = float(w) / h if h > 0 else 0
if 1.5 <= aspect_ratio <= 6 and area > max_area:
max_area = area
rect = cv.minAreaRect(contour)
box = cv.boxPoints(rect)
best_contour = np.int0(box)
if best_contour is None:
logging.error("No valid contour found")
return None
# Step 7: Perspective transformation to get front view
# Order points: top-left, top-right, bottom-right, bottom-left
pts = best_contour.reshape(4, 2)
rect = np.zeros((4, 2), dtype="float32")
# Sum and difference of coordinates to find corners
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)] # Top-left has smallest sum
rect[2] = pts[np.argmax(s)] # Bottom-right has largest sum
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)] # Top-right has smallest difference
rect[3] = pts[np.argmax(diff)] # Bottom-left has largest difference
# Calculate width and height of new image
width_a = np.linalg.norm(rect[0] - rect[1])
width_b = np.linalg.norm(rect[2] - rect[3])
max_width = max(int(width_a), int(width_b))
height_a = np.linalg.norm(rect[0] - rect[3])
height_b = np.linalg.norm(rect[1] - rect[2])
max_height = max(int(height_a), int(height_b))
# Destination points for perspective transform
dst_pts = np.array([
[0, 0],
[max_width - 1, 0],
[max_width - 1, max_height - 1],
[0, max_height - 1]
], dtype="float32")
# Get perspective transform matrix and apply it
M = cv.getPerspectiveTransform(rect, dst_pts)
warped = cv.warpPerspective(img, M, (max_width, max_height))
# Step 8: Convert warped image to grayscale
warped_gray = cv.cvtColor(warped, cv.COLOR_BGR2GRAY)
# Step 9: Apply adaptive thresholding for better binarization
# First, enhance contrast
clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(warped_gray)
# Apply Otsu's thresholding to get binary image
_, binary = cv.threshold(enhanced, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU)
# Step 10: Clean up the binary image with morphological operations
kernel_clean = cv.getStructuringElement(cv.MORPH_RECT, (2, 2))
cleaned = cv.morphologyEx(binary, cv.MORPH_CLOSE, kernel_clean)
# Ensure the output is in the correct format (U8)
result = cleaned.astype(np.uint8)
return result
@dataclass
class Cli:
input_file: Path
"""The path to input file"""
output_file: Path
"""The path to output file"""
@staticmethod
def from_cmdline() -> "Cli":
# Build parser
parser = argparse.ArgumentParser(
prog="Car Plate Extractor",
description="Extract the car plate part from given image.",
)
parser.add_argument(
"-i",
"--in",
required=True,
type=str,
action="store",
dest="input_file",
metavar="in.jpg",
help="""The path to input image containing car plate.""",
)
parser.add_argument(
"-o",
"--out",
required=True,
type=str,
action="store",
dest="output_file",
metavar="out.png",
help="""The path to output image for extracted car plate.""",
)
# Parse argument from cmdline and return
args = parser.parse_args()
return Cli(Path(args.input_file), Path(args.output_file))
def main():
# Setup logging format
logging.basicConfig(format="[%(levelname)s] %(message)s", level=logging.INFO)
# Get user request
cli = Cli.from_cmdline()
# Load file
in_img = cv.imread(str(cli.input_file), cv.IMREAD_COLOR)
if in_img is None:
logging.error(f"Fail to load image {cli.input_file}")
return
# Save extracted file if possible
out_img = extract_car_plate(in_img)
if out_img is not None:
cv.imwrite(str(cli.output_file), out_img)
if __name__ == "__main__":
main()