Skip to content

Commit

Permalink
refine citation (#161)
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinHuSh authored Mar 28, 2024
1 parent 37cc673 commit f347720
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 17 deletions.
7 changes: 4 additions & 3 deletions api/apps/conversation_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ def chat(dialog, messages, **kwargs):
# try to use sql if field mapping is good to go
if field_map:
chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
return use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl)
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl)
if ans: return ans

prompt_config = dialog.prompt_config
for p in prompt_config["parameters"]:
Expand Down Expand Up @@ -305,7 +306,7 @@ def get_table():

tbl, sql = get_table()
if tbl is None:
return None, None
return None
if tbl.get("error") and tried_times <= 2:
user_promt = """
表名:{};
Expand Down Expand Up @@ -333,7 +334,7 @@ def get_table():
chat_logger.info("GET table: {}".format(tbl))
print(tbl)
if tbl.get("error") or len(tbl["rows"]) == 0:
return None, None
return None

docid_idx = set([ii for ii, c in enumerate(
tbl["columns"]) if c["name"] == "doc_id"])
Expand Down
2 changes: 1 addition & 1 deletion rag/app/paper.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def _begin(txt):
print(tbls)

return {
"title": title if title else filename,
"title": title,
"authors": " ".join(authors),
"abstract": abstr,
"sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if
Expand Down
29 changes: 16 additions & 13 deletions rag/nlp/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,19 +246,22 @@ def insert_citations(self, answer, chunks, chunk_v,
chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
for ck in chunks]
cites = {}
for i, a in enumerate(pieces_):
sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
chunk_v,
huqie.qie(
self.qryr.rmWWW(pieces_[i])).split(" "),
chunks_tks,
tkweight, vtweight)
mx = np.max(sim) * 0.99
es_logger.info("{} SIM: {}".format(pieces_[i], mx))
if mx < 0.63:
continue
cites[idx[i]] = list(
set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
thr = 0.63
while len(cites.keys()) == 0 and pieces_ and chunks_tks:
for i, a in enumerate(pieces_):
sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
chunk_v,
huqie.qie(
self.qryr.rmWWW(pieces_[i])).split(" "),
chunks_tks,
tkweight, vtweight)
mx = np.max(sim) * 0.99
es_logger.info("{} SIM: {}".format(pieces_[i], mx))
if mx < thr:
continue
cites[idx[i]] = list(
set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
thr *= 0.8

res = ""
seted = set([])
Expand Down

0 comments on commit f347720

Please sign in to comment.