-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
111 lines (80 loc) · 2.37 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from flask import Flask, render_template, jsonify, request
import numpy as np
import pandas as pd
import json
import random
import re
from flask_cors import CORS
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
app = Flask(__name__)
CORS(app)
data = pd.read_csv('data.csv', encoding='ISO-8859-1')
data = data.dropna()
def preprocessData(datas):
datas = datas.str.lower()
datas = datas.str.replace('[^\w\s]', '')
datas = datas.str.replace('\d+', '')
return datas
def preprocessSearch(query):
query = query.lower()
query = re.sub(r'[^\w\s]', '', query)
query = re.sub(r'\d+', '', query)
return query
def tf_idf(key, desc):
tfidf = TfidfVectorizer(stop_words='english')
tfidf_weight = tfidf.fit_transform(desc)
search = tfidf.transform([key])
return search, tfidf_weight
def similarity(search, tfidf_weight):
cosine_sim = cosine_similarity(search, tfidf_weight)
sim = cosine_sim[0]
most = []
min = 6
while min > 1:
ind = np.argmax(sim)
if (sim[ind] != 0):
most.append(ind)
sim[ind] = 0
min -= 1
most = list(dict.fromkeys(most))
return most
title = data['title']
desc = data['description']
code = data['code']
cleanDesc = preprocessData(desc)
@app.route('/')
def index():
title = data['title']
length = str(len(title))
rand = {}
for i in range(5):
randomNumber = random.randint(0, len(title))
rand[str(randomNumber)] = {"id": int(
randomNumber), "title": title[randomNumber]}
return jsonify(rand), 200
@app.route('/search', methods=['POST'])
def search():
query = request.args['query']
query = preprocessSearch(query)
key, weight = tf_idf(query, cleanDesc)
most = similarity(key, weight)
jsonTemplate = {}
for item in most:
ret = {"id": int(item), "title": title[item]}
jsonTemplate[str(item)] = ret
if (len(most) > 0):
return jsonify(jsonTemplate), 200
else:
return jsonify(jsonTemplate), 204
@app.route('/code')
@app.route('/code/<codeID>')
def code(codeID=None):
if codeID == None:
dataDict = {}
return jsonify(dataDict), 204
else:
dataDict = data.loc[int(codeID)].to_dict()
return jsonify(dataDict), 200
if __name__ == "__main__":
app.run(debug=True)