forked from xmxoxo/Bert-1
-
Notifications
You must be signed in to change notification settings - Fork 1
/
client.py
128 lines (99 loc) · 4.06 KB
/
client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from __future__ import print_function
import requests
from classifier import *
import time
import tensorflow as tf
import numpy as np
tf.enable_eager_execution()
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
session = tf.Session(config=config)
endpoint = 'http://127.0.0.1:8500'
def input_fn_builder(features, seq_length, is_training, drop_remainder):
"""Creates an `input_fn` closure to be passed to TPUEstimator."""
all_input_ids = []
all_input_mask = []
all_segment_ids = []
all_label_ids = []
for feature in features:
all_input_ids.append(feature.input_ids)
all_input_mask.append(feature.input_mask)
all_segment_ids.append(feature.segment_ids)
all_label_ids.append(feature.label_id)
"""The actual input function."""
batch_size = FLAGS.predict_batch_size
num_examples = len(features)
# This is for demo purposes and does NOT scale to large data sets. We do
# not use Dataset.from_generator() because that uses tf.py_func which is
# not TPU compatible. The right way to load data is with TFRecordReader.
d = tf.data.Dataset.from_tensor_slices({
"input_ids":
tf.constant(
all_input_ids, shape=[num_examples, seq_length],
dtype=tf.int32),
"input_mask":
tf.constant(
all_input_mask,
shape=[num_examples, seq_length],
dtype=tf.int32),
"segment_ids":
tf.constant(
all_segment_ids,
shape=[num_examples, seq_length],
dtype=tf.int32),
"label_ids":
tf.constant(all_label_ids, shape=[num_examples], dtype=tf.int32),
})
if is_training:
d = d.repeat()
d = d.shuffle(buffer_size=100)
d = d.batch(batch_size=batch_size, drop_remainder=drop_remainder)
return d
class Client:
def __init__(self):
self.processor = MyProcessor()
def sort_and_retrive(self, predictions, qa_pairs):
res = []
for prediction, qa in zip(predictions, qa_pairs):
res.append((prediction[1], qa))
res.sort(reverse=True)
return res
def preprocess(self, sentences):
save_path = os.path.join(FLAGS.data_dir, "pred.tsv")
with open(save_path, 'w') as fout:
out_line = '0' + '\t' + ' ' + '\n'
fout.write(out_line)
for sentence in sentences:
out_line = '0'+'\t' + sentence + '\n'
fout.write(out_line)
def predict(self, text_list):
sentences = [["0",sentence]for sentence in text_list]
predict_examples = self.processor.create_examples(sentences, set_type='test',file_base=False)
label_list = self.processor.get_labels()
tokenizer = tokenization.FullTokenizer(
vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
features = convert_examples_to_features(predict_examples, label_list,FLAGS.max_seq_length, tokenizer)
predict_dataset = input_fn_builder(
features=features,
seq_length=FLAGS.max_seq_length,
is_training=False,
drop_remainder=False)
iterator = predict_dataset.make_one_shot_iterator()
next_element = iterator.get_next()
inputs = ["label_ids", "input_ids", "input_mask", "segment_ids"]
for input in inputs:
next_element[input] = next_element[input].numpy().tolist()
json_data = {"model_name": "default", "data": next_element}
start = time.time()
result = requests.post(endpoint, json=json_data)
cost = time.time() - start
print('total time cost: %s s' % cost)
result = dict(result.json())
output = [np.argmax(i)-1 for i in result['output']]
return output
if __name__ == '__main__':
client = Client()
msg = ["电池一直用可以用半天,屏幕很好。","机是正品,用着很流畅,618活动时买的,便宜了不少!",""]
prediction = client.predict(msg)
# print('probability: %s'%prediction)
print(prediction)