-
Notifications
You must be signed in to change notification settings - Fork 0
/
prep.py
executable file
·31 lines (24 loc) · 949 Bytes
/
prep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 17 16:11:54 EDT 2021
author: Ryan Hildebrandt
"""
# Doc setup
#https://github.com/RajkumarGalaxy/NLP/blob/master/beginners-guide-to-text-generation-with-rnns.ipynb
import pickle
import tensorflow as tf
with open("./outputs/scraped_data.pickle", "rb") as f:
kj_dict, kj_list, yj_dict, yoji_df, bg_list, bg_dict = pickle.load(f)
# prep yoji_df
tokenizer = {char:i for i,char in enumerate(kj_list)}
tokenized = [[tokenizer[i] for i in j] for j in yoji_df.yoji]
sequences = tf.data.Dataset.from_generator(lambda: tokenized, tf.int32, output_shapes=tf.TensorShape(4,))
def prepare_dataset(seq):
input_vector = seq[:-1]
target_vector = seq[1:]
return input_vector, target_vector
dataset = sequences.map(prepare_dataset)
AUTOTUNE = tf.data.experimental.AUTOTUNE
data_prepped = dataset.batch(64, drop_remainder=True).repeat()
data_prepped = data_prepped.prefetch(AUTOTUNE)