Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added lookup table and endpoint to return the whole table and only on… #67

Merged
merged 1 commit into from
May 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@ def upgrade():
)

op.bulk_insert(confidence_lookup_table, [
{'source': 'curated match', 'confidence': 1.1},
{'source': 'mark as delete', 'confidence': -1},
{'source': 'ADS', 'confidence': 1.3},
{'source': 'incorrect', 'confidence': -1},
{'source': 'author', 'confidence': 1.2},
{'source': 'publisher', 'confidence': 1.1},
{'source': 'SPIRES', 'confidence': 1.05},
])
# ### end Alembic commands ###

Expand Down
21 changes: 12 additions & 9 deletions oraclesrv/doc_matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ def get_requests_params(payload, param, default_value=None, default_type=str):

class DocMatching(object):

def __init__(self, payload):
def __init__(self, payload, save=True):
"""

:param payload:
:param save:
"""
# read required params
self.abstract = get_requests_params(payload, 'abstract')
Expand All @@ -39,6 +40,7 @@ def __init__(self, payload):
self.mustmatch = get_requests_params(payload, 'mustmatch')
self.match_doctype = get_requests_params(payload, 'match_doctype', default_type=list)
self.source_bibcode = get_requests_params(payload, 'bibcode')
self.save_to_db = save

def create_and_return_response(self, match, query, comment=None):
"""
Expand Down Expand Up @@ -183,14 +185,15 @@ def save_match(self, result):
:param result:
:return:
"""
if result and result[0].get('match', []):
the_match = result[0]['match']
# if there is only one record, and the confidence is high enough to be considered a match
if len(the_match) == 1 and the_match[0]['matched'] == 1:
add_a_record({'source_bibcode': the_match[0]['source_bibcode'],
'matched_bibcode': the_match[0]['matched_bibcode'],
'confidence': the_match[0]['confidence']},
source_bibcode_doctype=self.doctype)
if self.save_to_db:
if result and result[0].get('match', []):
the_match = result[0]['match']
# if there is only one record, and the confidence is high enough to be considered a match
if len(the_match) == 1 and the_match[0]['matched'] == 1:
add_a_record({'source_bibcode': the_match[0]['source_bibcode'],
'matched_bibcode': the_match[0]['matched_bibcode'],
'confidence': the_match[0]['confidence']},
source_bibcode_doctype=self.doctype)

def process(self):
"""
Expand Down
34 changes: 33 additions & 1 deletion oraclesrv/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sqlalchemy.sql import exists
from sqlalchemy.dialects.postgresql import insert

from oraclesrv.models import DocMatch
from oraclesrv.models import DocMatch, ConfidenceLookup

re_doi = re.compile(r'\bdoi:\s*(10\.[\d\.]{2,9}/\S+\w)', re.IGNORECASE)
def get_solr_data(rows, query, fl):
Expand Down Expand Up @@ -343,3 +343,35 @@ def query_docmatch(params):
except SQLAlchemyError as e:
current_app.logger.error('SQLAlchemy: ' + str(e))
return [], 404

def query_source_score():
"""

:return:
"""
try:
with current_app.session_scope() as session:
rows = session.query(ConfidenceLookup).all()
results = []
for row in rows:
results.append(row.toJSON())
return results, 200
except SQLAlchemyError as e:
current_app.logger.error('SQLAlchemy: ' + str(e))
return [], 404

def lookup_confidence(source):
"""

:param source:
:return:
"""
try:
with current_app.session_scope() as session:
row = session.query(ConfidenceLookup.confidence).filter(ConfidenceLookup.source == source).first()
if row:
return row[0], 200
return 0, 400
except SQLAlchemyError as e:
current_app.logger.error('SQLAlchemy: ' + str(e))
return 0, 404
62 changes: 61 additions & 1 deletion oraclesrv/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from adsmsg import DocMatchRecordList
from google.protobuf.json_format import Parse, ParseError

from oraclesrv.utils import get_solr_data_recommend, add_records, del_records, query_docmatch
from oraclesrv.utils import get_solr_data_recommend, add_records, del_records, query_docmatch, query_source_score, lookup_confidence
from oraclesrv.keras_model import create_keras_model
from oraclesrv.doc_matching import DocMatching, get_requests_params

Expand Down Expand Up @@ -164,6 +164,7 @@ def read_history_post():

return read_history(payload, the_function, the_reader)

# TODO: set the save param in DocMatching to False
@advertise(scopes=[], rate_limit=[1000, 3600 * 24])
@bp.route('/docmatch', methods=['POST'])
def docmatch():
Expand All @@ -173,6 +174,32 @@ def docmatch():
"""
current_app.logger.debug('received request to find a match for a document')

try:
payload = request.get_json(force=True) # post data in json
except:
payload = dict(request.form) # post data in form encoding

if not payload:
return return_response(results={'error': 'no information received'}, status_code=400)

start_time = time.time()
results, status_code = DocMatching(payload, save=True).process()

current_app.logger.debug('docmatching results = %s'%json.dumps(results))
current_app.logger.debug('docmatching status_code = %d'%status_code)

current_app.logger.debug("Matched doc in {duration} ms".format(duration=(time.time() - start_time) * 1000))
return return_response(results, status_code)

@advertise(scopes=['ads:oracle-service'], rate_limit=[1000, 3600 * 24])
@bp.route('/docmatch_add', methods=['POST'])
def docmatch_add():
"""

:return:
"""
current_app.logger.debug('received request to find a match for a document')

try:
payload = request.get_json(force=True) # post data in json
except:
Expand All @@ -190,6 +217,7 @@ def docmatch():
current_app.logger.debug("Matched doc in {duration} ms".format(duration=(time.time() - start_time) * 1000))
return return_response(results, status_code)


@advertise(scopes=['ads:oracle-service'], rate_limit=[1000, 3600 * 24])
@bp.route('/add', methods=['PUT'])
def add():
Expand Down Expand Up @@ -306,3 +334,35 @@ def query():
payload['date_cutoff'] = str(payload['date_cutoff'])
return return_response({'params':payload, 'results':results}, status_code)

@advertise(scopes=[], rate_limit=[1000, 3600 * 24])
@bp.route('/source_score', methods=['GET'])
def source_score():
"""

:return:
"""
current_app.logger.debug('received request to get the list of source and score')

results, status_code = query_source_score()

current_app.logger.debug('source_score results = %s'%json.dumps(results))
current_app.logger.debug('source_score status_code = %d'%status_code)

return return_response({'results':results}, status_code)

@advertise(scopes=[], rate_limit=[1000, 3600 * 24])
@bp.route('/confidence/<source>', methods=['GET'])
def confidence(source):
"""

:return:
"""
current_app.logger.debug('received request to get confidence score for source name %s'%source)

score, status_code = lookup_confidence(source)

current_app.logger.debug('confidence value = %s'%score)
current_app.logger.debug('confidence status_code = %d'%status_code)

return return_response({'confidence':score}, status_code)