Generate new Pokémon names using deep learning
A guy holding a talk said he'd tried, but was unsatisfied
import pokemon.names
x, y = [], []
tok = Tokenizer(char_level=True)
tok.fit_on_texts(pokemanz)
cw = {v: k for k, v in tok.word_index.items()}
for seq in tok.texts_to_sequences(pokemanz):
x.extend(seq[:-1])
y.extend(seq[1:])
x = np.array(x, dtype=np.uint)
y = np.array(y, dtype=np.uint)
word_index = {k: v for k, v in tok.word_index.items()}
char_index = {v: k for k, v in tok.word_index.items()}
vocsize = len(word_index)
assert np.count_nonzero(y == word_index['$']) == len(pokemanz)
assert (char_index[x[1]], char_index[y[1]]) == tuple('Bu')
assert len(x) == len(y)
batch_size, num_batches = 10000, 5000
batches_per_it = 50
sizes = 256, 128
seq_length = 4
# Pad with zeroes
x = np.array([([0]*seq_length + list(x[max(0, i-seq_length):i]))[-seq_length:]
for i in range(1, len(x) + 1)])
y = np_utils.to_categorical(y - 1, vocsize)
model = Sequential()
model.add(Embedding(vocsize + 1, sizes[0], mask_zero=True,
input_length=seq_length))
for size in sizes[:-1]:
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(sizes[-1]))
model.add(Dense(vocsize))
model.add(Activation('softmax'))
print('x.shape:', x.shape)
print('y.shape:', y.shape)
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop')
with open('topology.json', 'w') as f:
f.write(model.to_json())
for iteration in range(1, num_batches//batches_per_it):
print()
print('-' * 50)
print('Iteration', iteration)
model.fit(x, y, batch_size=batch_size,
nb_epoch=batches_per_it, verbose=True)
model.save_weights('brain-{}.h5'.format(iteration))
from pokemon.names import sample
sample(n=10)
sample(root='Guido')