Trying to train a model to generate text on my GPU, and keep getting [UNK] and "NaN" loss on one computer. However, on my MacBook it works just fine?

Here is the full code. This currently works just fine on my M1 MacBook running Monterey and Tensorflow-Metal. However, when I export the dataset and code to my laptop with an RTX 3060 Laptop GPU with Pop_OS! that is when I start getting the [UNK] characters generated and “NaN” loss. I’m unsure of what steps to take to make this better. Any advice would be appreciated.

import os, sys, time import numpy as np import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.callbacks import ModelCheckpoint from tensorflow.keras.optimizers import Adam from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.layers import StringLookup from tensorflow.keras.layers import Embedding from tensorflow.keras.layers import Bidirectional from tensorflow.keras.layers import SimpleRNN from tensorflow.keras.layers import Dense BATCH_SIZE = 128 BUFFER_SIZE = 10_000 EMBEDDING_DIMENSION = 128 RNN_UNITS = 1024 CHECKPOINT_DIR = './training_checkpoints' CHECKPOINT_PREFIX = os.path.join(CHECKPOINT_DIR, "ckpt_{epoch}") EPOCHS = 16 def text_from_ids(ids): return tf.strings.reduce_join(chars_from_ids(ids), axis=1) def split_input_target(sequence): input_text = sequence[:-1] target_text = sequence[1:] return input_text, target_text def generate_text(model, seed_text, next_words, max_sequence_len): for _ in range(next_words): token_list = Tokenizer().texts_to_sequences([seed_text])[0] token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre') predicted = model.predict(token_list, verbose=0) output_word = "" for word,index in Tokenizer().word_index.items(): if index == predicted: output_word = word break seed_text += " "+output_word return seed_text.title() def generate_char(inputs): input_ids = tf.convert_to_tensor(ids_from_chars(inputs)) predicted_logits = model(inputs=np.array([input_ids])) predicted_logits = predicted_logits[:, -1, :] # print(predicted_logits) predicted_logits = predicted_logits/1.0 # print(predicted_logits) predicted_ids = tf.random.categorical(predicted_logits, num_samples=1) predicted_ids = tf.squeeze(predicted_ids, axis=-1) return chars_from_ids(predicted_ids) text = open("./data.txt", "rb").read().decode(encoding="UTF-8") vocab = sorted(set(text)) vocab_size = len(vocab) print(f"Text Length: {len(text)}") print(f"Text Vocab: {vocab}") print(f"Text Vocab Size: {vocab_size}") ids_from_chars = StringLookup(vocabulary=list(vocab), mask_token=None, name='lookup') chars_from_ids = StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None) all_ids = ids_from_chars(tf.strings.unicode_split(text, "UTF-8")) ids_dataset = sequence_length = 100 examples_per_epoch = len(text)//(sequence_length+1) sequences = ids_dataset.batch(sequence_length+1, drop_remainder=True) dataset = dataset = ( dataset.shuffle(BUFFER_SIZE) .batch(BATCH_SIZE, drop_remainder=True) .prefetch( ) model = Sequential() model.add(Embedding(vocab_size, EMBEDDING_DIMENSION, batch_input_shape=[BATCH_SIZE, None])) model.add(SimpleRNN(RNN_UNITS, return_sequences=True)) model.add(Dense(vocab_size,)) checkpoint_callback = ModelCheckpoint( filepath=CHECKPOINT_PREFIX, save_weights_only=False, save_best_only=False, verbose=1 ) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) model.compile(loss = loss, optimizer='adam', run_eagerly=True) model.summary(), batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=[checkpoint_callback])"./model/") model = tf.keras.models.load_model("./model/") next_char = tf.constant(["After "]) result = [] for n in range(256): next_char = generate_char(next_char) result.append(next_char) print(tf.strings.join(result)[0].numpy().decode("utf-8")) 

submitted by /u/weepthewillow_
[visit reddit] [comments]

Leave a Reply

Your email address will not be published. Required fields are marked *