ANPR OpenCV PyTesseract Python Code not working

Crappy coder needs help with code. Began work on a Python Script for an Automated License Plate Recognition system on a Raspberry Pi. The intended goal of it is to derive a plate from an image, read the text and download to a file. The text is then displayed on a HTML webpage connected via Django Framework.

I've been told to change from PyTesseract to a different OCR model but I'm too far into the project to really change anything substantially.

I'm not sure what's going wrong specifically with the code so any input is appreciated.

import cv2
import pytesseract
import os
import datetime
import numpy as np

# Directory to save images
IMAGE_DIR = "captured_plates"
if not os.path.exists(IMAGE_DIR):
    os.makedirs(IMAGE_DIR)

# File paths
ARRIVED_FILE = "arrived.txt"
LEFT_FILE = "left.txt"

# Open the camera
cap = cv2.VideoCapture(0)

def preprocess_image(image):
    """Convert image to grayscale, blur, and apply adaptive thresholding."""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(blurred, 100, 200)  # Edge detection
    return edged

def find_license_plate(image):
    """Detect contours and extract the region most likely to be the license plate."""
    processed = preprocess_image(image)
    contours, _ = cv2.findContours(processed, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    possible_plates = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / float(h)

        # Heuristic conditions to identify a license plate
        if 2 < aspect_ratio < 6 and 100 < w < 500 and 40 < h < 150:
            possible_plates.append((x, y, w, h))

    if possible_plates:
        # Sort by width and return the largest detected plate
        possible_plates = sorted(possible_plates, key=lambda x: x[2], reverse=True)
        x, y, w, h = possible_plates[0]
        return image[y:y+h, x:x+w]  # Crop to license plate region

    return None

def read_license_plate(image):
    """Segment the plate and run OCR to extract the number."""
    plate_region = find_license_plate(image)
    if plate_region is None:
        return None

    gray_plate = cv2.cvtColor(plate_region, cv2.COLOR_BGR2GRAY)
    _, thresh_plate = cv2.threshold(gray_plate, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    text = pytesseract.image_to_string(thresh_plate, config='--psm 8')
    return ''.join(filter(str.isalnum, text)).upper(), plate_region

def save_plate(plate_text, image):
    """Save recognized plate data and images."""
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    filename = f"{IMAGE_DIR}/{plate_text}_{timestamp}.jpg"
    cv2.imwrite(filename, image)

    # Read current arrived and left records
    arrived_plates = set()
    left_plates = set()

    if os.path.exists(ARRIVED_FILE):
        with open(ARRIVED_FILE, "r") as f:
            arrived_plates = set(f.read().splitlines())

    if os.path.exists(LEFT_FILE):
        with open(LEFT_FILE, "r") as f:
            left_plates = set(f.read().splitlines())

    # Store the plate appropriately
    if plate_text not in arrived_plates:
        with open(ARRIVED_FILE, "a") as f:
            f.write(f"{plate_text},{timestamp}\n")
    elif plate_text in arrived_plates and plate_text not in left_plates:
        with open(LEFT_FILE, "a") as f:
            f.write(f"{plate_text},{timestamp}\n")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    plate_text, plate_image = read_license_plate(frame)
    if plate_text and len(plate_text) > 5:  # Ensure valid plate length
        save_plate(plate_text, plate_image)
        print(f"Detected Plate: {plate_text}")

    cv2.imshow("ANPR", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

It's developed in a Raspberry Pi Environment. It's connected to a a HTML webpage via Django Framework.

Back to Top