Handwritten digit recognition using deep learning Handwritten digit recognition is a classic problem in the field of artificial intelligence and is often used as a benchmark for testing the capabilities of machine learning algorithms, including Artificial Neural Networks (ANNs). The goal of handwritten digit recognition is to correctly classify images of handwritten digits (typically from 0 to 9) into their corresponding numeric representations.
For more details. Download the pdf
import sys import tensorflow as tf from tensorflow import keras import numpy as np from PIL import Image, ImageOps, ImageFilter import os.path import matplotlib.pyplot as plt from sklearn.metrics import roc_auc_score # Step 1: Load the pre-trained model or train the model if it doesn't exist model_path = 'digit_recognition_model.h5' if os.path.isfile(model_path): model = keras.models.load_model(model_path) print('Pre-trained model loaded.') else: print('No pre-trained model found. Training a new model.') (x_train, y_train), _ = keras.datasets.mnist.load_data() x_train = x_train / 255.0 x_train = np.expand_dims(x_train, axis=-1) # Reshape to (num_samples, height, width, channels) # Data augmentation datagen = keras.preprocessing.image.ImageDataGenerator( rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.1, validation_split=0.2 ) train_generator = datagen.flow(x_train, y_train, subset='training') validation_generator = datagen.flow(x_train, y_train, subset='validation') model = keras.Sequential([ keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), keras.layers.MaxPooling2D((2, 2)), keras.layers.Flatten(), keras.layers.Dense(128, activation='relu'), keras.layers.Dense(10, activation='softmax') ]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(train_generator, epochs=10, validation_data=validation_generator) model.save(model_path) # Rest of the code for testing the model remains the same # Step 2: Test the model with real-time input if len(sys.argv) < 2: print("Please provide the image path as a command-line argument.") sys.exit(1) image_path = sys.argv[1] image = Image.open(image_path).convert('L') #to make the background of the given image white # Threshold value to separate the background from the digits (adjust as needed) threshold = 100 # Make the background white (pixels below the threshold become white) image = image.point(lambda x: 255 if x > threshold else x) # Make the background white (pixels below the threshold become white) # Invert the image (make the digits black) image = image.filter(ImageFilter.MedianFilter(size=3)) # Image preprocessing image = image.resize((28, 28)) image_array = np.array(image) image_array = image_array / 255.0 image_array = 1 - image_array # Invert the image (if needed) image_array = np.expand_dims(image_array, axis=-1) image_array = np.expand_dims(image_array, axis=0) # Display the converted image plt.imshow(image_array[0, :, :, 0], cmap='gray') plt.title('Converted Image') plt.axis('off') plt.show() # Step 3: Make predictions on the preprocessed image predictions = model.predict(image_array) predicted_digit = np.argmax(predictions[0]) print('Predicted Digit:', predicted_digit)