I am building an image classifier of sorts that takes an image of a speedometer and “reads” the value. I have a collection of about 4000 images all labeled with GPS velocity values. I read in the images and create the X and Y training and validation sets. However, the model doesn’t learn at all. I even tried pre-built models like Resnet50 that tensorflow has and Xception. Both of which gave similar if not identical outputs where the loss and accuracy were constant. When I added regularization it made it much worse where the accuracy was still fixed close to zero and the loss sky rocketed over 1,000,000. I realize that there is no “silver bullet” when tuning a neural network so all suggestions are welcome
“`python
##########################import dependencies
import imp import tensorflow as tf import matplotlib.pyplot as plt import numpy as np import os import sys import sklearn as sk import cv2 import pandas as pd import scipy from scipy.signal import fftconvolve from tokenize import endpats from glob import glob from os.path import join, basename from tensorflow.keras import layers, models, datasets from tensorflow import keras from keras import regularizers from sklearn.model_selection import train_test_split
#####################Create different models to test User defined model that can be changed to experiment with different structures input_shape is important. Make sure you either resize all images to match or, if your images are too large and would lose a lot of data, you can change the input shape here to either match you images or you can change the img.resize() function in the load_data functions. If you choose to keep your image size, you will need to remove the resize() functions from the load data functions and you will have to set the width and height to match your images The third dimension specifies the number of channels in your image. If you want to load color images into models that have the channels set to 1 (e.g create_model()), you will have to change the 1 to a 3.
def create_model(): model = keras.models.Sequential() model.add(layers.Conv2D(50, (3, 3), activation=’relu’, input_shape=(90, 160, 1))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(25, (3, 3), activation=’relu’)) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(10, (3, 3), activation=’relu’)) model.add(layers.Flatten()) model.add(layers.Dense(20, activation=’relu’)) model.add(layers.Dropout(0.1)) model.add(layers.Dense(10, activation=’relu’)) model.add(layers.Dense(1, activation=’softmax’)) model.compile(optimizer=’adam’, loss=”categorical_crossentropy”, metrics=[‘accuracy’]) return model
resnet architecture
def resnet(): base_model = keras.applications.resnet50.ResNet50( weights=’imagenet’, include_top=False, input_shape=[90, 160, 3]) avg = keras.layers.GlobalAveragePooling2D()(base_model.output) output = keras.layers.Dense(1, activation=’softmax’)(avg) model = keras.Model(inputs=base_model.input, outputs=output)
for layer in base_model.layers: layer.trainable = False optimizer = keras.optimizers.SGD() model.compile(loss="MSE", optimizer=optimizer, metrics=["accuracy"]) return model
not exact but based off of alexNet architecture
def alexNet(): model = keras.models.Sequential() model.add(layers.Conv2D(96, (11, 11), activation=’relu’, padding=’valid’, input_shape=(90, 160, 1))) model.add(layers.MaxPooling2D((3, 3), strides=2, padding=’valid’)) model.add(layers.Conv2D(128, (5, 5), activation=’relu’, padding=’same’)) model.add(layers.MaxPooling2D((3, 3), strides=2, padding=’valid’)) model.add(layers.Conv2D(128, (3, 3), activation=’relu’, padding=’same’, strides=1)) model.add(layers.Conv2D(128, (3, 3), activation=’relu’, padding=’same’, strides=1)) model.add(layers.Conv2D(128, (3, 3), activation=’relu’, padding=’same’, strides=1)) model.add(layers.Flatten()) model.add(layers.Dense(2048, activation=’relu’)) model.add(layers.Dense(1024, activation=’relu’)) model.add(layers.Dense(1, activation=’softmax’)) model.compile(optimizer=’adam’, loss=”MSE”, metrics=[‘accuracy’]) return model
lenet5 architecture
def lenet5(): model = keras.models.Sequential([ keras.layers.Conv2D(6, 5, activation=’tanh’, padding=”same”, input_shape=[90, 160, 1]), keras.layers.MaxPooling2D(2, strides=2), keras.layers.Conv2D(16, 5, activation=’tanh’, padding=”same”), keras.layers.MaxPooling2D(2, strides=2), keras.layers.Conv2D(120, 5, activation=’tanh’, padding=”same”), keras.layers.Flatten(), keras.layers.Dense(84, activation=’tanh’), keras.layers.Dense(1, activation=’softmax’), ]) optimizer = keras.optimizers.SGD() model.compile(loss=”MSE”, optimizer=optimizer, metrics=[“accuracy”]) return model
xception architechture
def xception(): base_model = tf.keras.applications.xception.Xception( include_top=False, weights=’imagenet’, input_shape=[90, 160, 3]) avg = keras.layers.GlobalAveragePooling2D()(base_model.output) output = keras.layers.Dense(1, activation=’softmax’)(avg) model = keras.Model(inputs=base_model.input, outputs=output) optimizer = keras.optimizers.SGD() model.compile(loss=”MSE”, optimizer=optimizer, metrics=[“accuracy”]) return model
##########################Filtering Functions Input: single image array Output: Single image array with detected edges
def edges_single_img(img): # define the vertical filter vertical_filter = [[-1, -2, -1], [0, 0, 0], [1, 2, 1]]
# define the horizontal filter horizontal_filter = [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]] n, m, d = img.shape edges_img = img.copy() # loop over all pixels in the image for row in range(3, n-2): for col in range(3, m-2): # create little local 3x3 box local_pixels = img[row-1:row+2, col-1:col+2, 0] # apply the vertical filter vertical_transformed_pixels = vertical_filter*local_pixels # remap the vertical score vertical_score = vertical_transformed_pixels.sum()/4 # apply the horizontal filter horizontal_transformed_pixels = horizontal_filter*local_pixels # remap the horizontal score horizontal_score = horizontal_transformed_pixels.sum()/4 # combine the horizontal and vertical scores into a total edge score edge_score = (vertical_score**2 + horizontal_score**2)**.5 # insert this edge score into the edges image edges_img[row, col] = [edge_score]*3 # remap the values in the 0-1 range in case they went out of bounds edges_img = edges_img/edges_img.max() return edges_img
##################untested function Input: Multi-dimensional array of images Output: Multi-dimensional array with detected edges
def edges_array(img_array): # define the vertical filter vertical_filter = [[-1, -2, -1], [0, 0, 0], [1, 2, 1]]
# define the horizontal filter horizontal_filter = [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]] w = len(img_array[0, :, 0]) p = len(img_array[0, 0, :]) l = len(img_array[:, 0, 0]) edge_img_array = np.zeros((l, w, p)) for i in range(len(img_array[:, 0, 0])): img = img_array[i, :, :] n, m, d = img.shape edges_img = img.copy() # loop over all pixels in the image for row in range(3, n-2): for col in range(3, m-2): # create little local 3x3 box local_pixels = img[row-1:row+2, col-1:col+2, 0] # apply the vertical filter vertical_transformed_pixels = vertical_filter*local_pixels # remap the vertical score vertical_score = vertical_transformed_pixels.sum()/4 # apply the horizontal filter horizontal_transformed_pixels = horizontal_filter*local_pixels # remap the horizontal score horizontal_score = horizontal_transformed_pixels.sum()/4 # combine the horizontal and vertical scores into a total edge score edge_score = (vertical_score**2 + horizontal_score**2)**.5 # insert this edge score into the edges image edges_img[row, col] = [edge_score]*3 # remap the values in the 0-1 range in case they went out of bounds edges_img = edges_img/edges_img.max() edge_img_array[i, :, :] = edges_img return edge_img_array
#############User input functions
def get_mode(): print(“Operating modes:n 1. Run all modelsn 2. Run single modeln “) mode = input(“Enter Operating mode: “) mode = int(mode) if (mode > 2 or mode < 1): print(“Error 1: Invalid Operating mode! Please enter a valid operating mode”) return mode
def model_choice(mode): if (mode == 1): print(“Loading all models…”) model_num = 0 return model_num elif (mode == 2): print(“Available Models:n 1. Custom Model n 2. lenet5 n 3. AlexNet Variant n 4. ResNet50 n 5. Xception”) model_num = input(“Enter the model that you would like to test: “) model_num = int(model_num) if (int(model_num) > 5 or int(model_num) < 1): print(“Error 2: Invalid model! Please enter a valid model”) else: return model_num
else: print("Error 3: Invalid mode passed")
############Load and parse data (preprocessing) Input: these functions take a file path as their input. This is the path to the directory with ALL images Important note: image names must follow the naming convention of “img#####_##.##.jpeg” the first 5 numbers are for image index the next 2 are the tens and ones place of velocity and the final two are the tenths and hundreths of the velocity if you wish to use a different naming convention, please edit the “create_labels” function to use you naming style Output: preprocessed, labeled data creates a list of all file names in the direcotry and sorts them
def createlabels(path_to_imgs): files = glob(join(path_to_imgs, ‘*’, ‘*.jpg’), recursive=True) files.sort(key=basename) labels = [] for x in files: start = x.find(‘_’) end = x.find(‘.j’) labels.append(float(x[start+1:end])) labels = np.asarray(labels, dtype=float) return labels, files
loads color images and coverts to grayscale In loaddata*() functions, you can change the w and h variables to match your images if you do not want to resize, or you can change to whatever size works best for your images
def load_data_gray(path_to_imgs): w = 16 * 10 h = 9 * 10 labels, files = create_labels(path_to_imgs) num_imgs = len(files) temp_array = np.zeros((num_imgs, h, w)) for idx, path in enumerate(files): img = cv2.imread(path) img = cv2.resize(img, (w, h)) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) temp_array[idx, :, :] = img temp_array = temp_array.reshape(num_imgs, 90, 160, -1) return temp_array, labels
assumes library of images is already in grayscale
def load_data_asGray(path_to_imgs): w = 16 * 10 h = 9 * 10 labels, files = create_labels(path_to_imgs) num_imgs = len(files) temp_array = np.zeros((num_imgs, h, w)) for idx, path in enumerate(files): img = cv2.imread(path) img = cv2.resize(img, (w, h)) temp_array[idx, :, :] = img temp_array = temp_array.reshape(num_imgs, 90, 160, -1) return temp_array, labels
loads color images
def load_data_color(path_to_imgs): w = 16 * 10 h = 9 * 10 labels, files = create_labels(path_to_imgs) num_imgs = len(files) temp_array = np.zeros((num_imgs, h, w, 3)) for idx, path in enumerate(files): img = cv2.imread(path) img = cv2.resize(img, (w, h)) temp_array[idx, :, :, :] = img temp_array = temp_array.reshape(num_imgs, 90, 160, 3) return temp_array, labels
def custom_train_test_split(img_arr, labels): X_train, X_test, y_train, y_test = train_test_split( img_arr, labels, test_size=0.2, random_state=42) return X_train, X_test, y_train, y_test
#####################Training functions Input: For some models that only use gray scale images, we input the trainingImages and trainingLabels so that data only as to loaded once For models that require color images (resnet) we do not input the trainingImages, and trainingLabels those are loaded inside the training function Output: an array of training data
def train_Xception_model(path_to_imgs): model = xception() print(model.summary()) trainingImages, trainingLabels = load_data_color(path_to_imgs) history = model.fit(trainingImages, trainingLabels, epochs=10) return history.history
def train_custom_model(trainingImages, trainingLabels): model = create_model() print(model.summary()) X_train, X_test, y_train, y_test = custom_train_test_split(trainingImages, trainingLabels) history = model.fit(X_train, y_train, epochs=100, batch_size=100, validation_data=(X_test, y_test)) return history.history
def train_resnet_model(path_to_imgs): model = resnet() trainingImages, trainingLabels = load_data_color(path_to_imgs) print(model.summary()) history = model.fit(trainingImages, trainingLabels, epochs=10) return history.history
def train_alexnet_model(trainingImages, trainingLabels): model = alexNet() print(model.summary()) history = model.fit(trainingImages, trainingLabels, epochs=10) return history.history
def train_lenet5_model(trainingImages, trainingLabels): model = lenet5() print(model.summary()) history = model.fit(trainingImages, trainingLabels, epochs=10) return history.history
Input: mode to operate in, model_num to determine which model train if mode != 1 Output: Array(s) of training data
def training(mode, model_num, path_to_imgs): trainingImages, trainingLabels = load_data_gray(path_to_imgs) if (mode == 1): history_custom = train_custom_model(trainingImages, trainingLabels) history_alex = train_alexnet_model(trainingImages, trainingLabels) history_lenet5 = train_lenet5_model(trainingImages, trainingLabels) history_resnet = train_resnet_model(path_to_imgs) history_xception = train_Xception_model(path_to_imgs) # df1 = pd.DataFrame(history_custom.history) # df1.to_excel(“custom_model_training.xlsx”) # df2 = pd.DataFrame(history_alex.history) # df2.to_excel(“alexnet_model_training.xlsx”) # df3 = pd.DataFrame(history_lenet5.history) # df3.to_excel(“lenet5_model_training.xlsx”) # df4 = pd.DataFrame(history_resnet.history) # df4.to_excel(“resnet_model_training.xlsx”) # df5 = pd.DataFrame(history_xception.history) # df5.to_excel(“xception_model_training.xlsx”) return history_custom, history_lenet5, history_alex, history_resnet, history_xception elif (mode == 2): if (model_num == 1): history = train_custom_model(trainingImages, trainingLabels) # df = pd.DataFrame(history.history) # df.to_excel(“custom_model_training.xlsx”) return history elif (model_num == 2): history = train_lenet5_model(trainingImages, trainingLabels) # df = pd.DataFrame(history.history) # df.to_excel(“lenet5_model_training.xlsx”) return history elif (model_num == 3): history = train_alexnet_model(trainingImages, trainingLabels) # df = pd.DataFrame(history.history) # df.to_excel(“alexnet_model_training.xlsx”) return history elif (model_num == 4): history = train_resnet_model(path_to_imgs) # df = pd.DataFrame(history.history) # df.to_excel(“resnet_model_training.xlsx”) return history elif (model_num == 5): history = train_Xception_model() # df = pd.DataFrame(history.history) # df.to_excel(“xception_model_training.xlsx”) else: print(“Invalid model number! Please choose againn”) else: print(“Invalid mode! Please choose againn”)
#################Adjust Tensorflow settings to run on GPU Input: Boolean to tell Tensorflow to use GPU acceleration or to strictly use CPU Output: Void
def set_tf_settings(use_GPU): if (use_GPU == False): os.environ[“CUDA_VISIBLE_DEVICES”] = “-1” else: physical_devices = tf.config.list_physical_devices(‘GPU’) try: tf.config.experimental.set_memory_growth(physical_devices[0], True) print(tf.config.experimental.get_device_details(physical_devices[0])) print(tf.config.experimental.get_memory_usage)
except: # Invalid device or cannot modify virtual devices once initialized. pass print(physical_devices[0])
#######################Export Model Functions Input: Tensorflow model instance and the name that you want to give it Output: A JSON file for model architecture and an h5py file for weight values both can be loaded back into a file using the inverse function json_file = open(‘model.json’, ‘r’) loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) loaded_model.load_weights(“model.h5”)
def export_model(model, model_name): model_json = model.to_json() with open(model_name + “.json”, “w”) as json_file: json_file.write(model_json) print(“Model saved!”) model.save_weights(model_name+”_weights.h5″) print(“weights saved!”)
PATH = ‘/home/[name]/Git/speedometer-data/imgs/’
def main(): mode = get_mode() model_num = model_choice(mode) if (mode == 2): history = training(mode, model_num, PATH) print(history.history) elif (mode == 1): history_custom, history_lenet5, history_alex, history_resnet, history_xception = training( mode, model_num) print(history_custom) print(history_lenet5) print(history_alex) print(history_resnet) print(history_xception) else: print(“Error 4: Invalid Mode Selected!”)
main()
“`