-
Notifications
You must be signed in to change notification settings - Fork 5
/
personalized_book_generator.py
64 lines (45 loc) · 1.84 KB
/
personalized_book_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import json
import time
from tqdm import tqdm
from processor.goodreads.prompter import GoodreadsPrompter, GoodreadsColdUser
from utils.openai.chat_service import ChatService
MIN_INTERVAL = 0
# concise
goodreads_prompter = GoodreadsPrompter('data/goodreads/book.csv', desc_path='data/goodreads/book-desc.csv')
user_list = GoodreadsColdUser('data/goodreads/user', goodreads_prompter).stringify()
system = """You are asked to capture user's interest based on his/her browsing history, and recommend a book that he/she may be interested. The format of history is as below:
(1) {book title}, description: {book desc}
...
(n) {book title}, description: {book desc}
You can only recommend one book (only one) in the following json format:
{
"title": ...,
"description": ...,
}
The book should be diverse, that is not too similar with the original provided book list. You are not allowed to response any other words for any explanation or note. JUST GIVE ME JSON-FORMAT NEWS. Now, the task formally begins. Any other information should not disturb you."""
save_path = 'data/goodreads/generator_v1.log'
with open(save_path, 'a'):
pass
exist_set = set()
with open(save_path, 'r') as f:
for line in f:
data = json.loads(line)
exist_set.add(data['uid'])
for uid, content in tqdm(user_list):
start_time = time.time()
if uid in exist_set:
continue
if not content:
continue
print(uid, content)
try:
service = ChatService(system)
enhanced = service.ask(content) # type: str
enhanced = enhanced.rstrip('\n')
with open(save_path, 'a') as f:
f.write(json.dumps({'uid': uid, 'book': enhanced}) + '\n')
except Exception as e:
print(e, uid)
interval = time.time() - start_time
if interval <= MIN_INTERVAL:
time.sleep(MIN_INTERVAL - interval)