ANPR OpenCV PyTesseract Python Code not working
Crappy coder needs help with code. Began work on a Python Script for an Automated License Plate Recognition system on a Raspberry Pi. The intended goal of it is to derive a plate from an image, read the text and download to a file. The text is then displayed on a HTML webpage connected via Django Framework.
I've been told to change from PyTesseract to a different OCR model but I'm too far into the project to really change anything substantially.
I'm not sure what's going wrong specifically with the code so any input is appreciated.
import cv2
import pytesseract
import os
import datetime
import numpy as np
# Directory to save images
IMAGE_DIR = "captured_plates"
if not os.path.exists(IMAGE_DIR):
os.makedirs(IMAGE_DIR)
# File paths
ARRIVED_FILE = "arrived.txt"
LEFT_FILE = "left.txt"
# Open the camera
cap = cv2.VideoCapture(0)
def preprocess_image(image):
"""Convert image to grayscale, blur, and apply adaptive thresholding."""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 100, 200) # Edge detection
return edged
def find_license_plate(image):
"""Detect contours and extract the region most likely to be the license plate."""
processed = preprocess_image(image)
contours, _ = cv2.findContours(processed, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
possible_plates = []
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / float(h)
# Heuristic conditions to identify a license plate
if 2 < aspect_ratio < 6 and 100 < w < 500 and 40 < h < 150:
possible_plates.append((x, y, w, h))
if possible_plates:
# Sort by width and return the largest detected plate
possible_plates = sorted(possible_plates, key=lambda x: x[2], reverse=True)
x, y, w, h = possible_plates[0]
return image[y:y+h, x:x+w] # Crop to license plate region
return None
def read_license_plate(image):
"""Segment the plate and run OCR to extract the number."""
plate_region = find_license_plate(image)
if plate_region is None:
return None
gray_plate = cv2.cvtColor(plate_region, cv2.COLOR_BGR2GRAY)
_, thresh_plate = cv2.threshold(gray_plate, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
text = pytesseract.image_to_string(thresh_plate, config='--psm 8')
return ''.join(filter(str.isalnum, text)).upper(), plate_region
def save_plate(plate_text, image):
"""Save recognized plate data and images."""
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename = f"{IMAGE_DIR}/{plate_text}_{timestamp}.jpg"
cv2.imwrite(filename, image)
# Read current arrived and left records
arrived_plates = set()
left_plates = set()
if os.path.exists(ARRIVED_FILE):
with open(ARRIVED_FILE, "r") as f:
arrived_plates = set(f.read().splitlines())
if os.path.exists(LEFT_FILE):
with open(LEFT_FILE, "r") as f:
left_plates = set(f.read().splitlines())
# Store the plate appropriately
if plate_text not in arrived_plates:
with open(ARRIVED_FILE, "a") as f:
f.write(f"{plate_text},{timestamp}\n")
elif plate_text in arrived_plates and plate_text not in left_plates:
with open(LEFT_FILE, "a") as f:
f.write(f"{plate_text},{timestamp}\n")
while True:
ret, frame = cap.read()
if not ret:
break
plate_text, plate_image = read_license_plate(frame)
if plate_text and len(plate_text) > 5: # Ensure valid plate length
save_plate(plate_text, plate_image)
print(f"Detected Plate: {plate_text}")
cv2.imshow("ANPR", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
It's developed in a Raspberry Pi Environment. It's connected to a a HTML webpage via Django Framework.