From c1532a2d5d6f18be1d3cd309d001b180525f1e4e Mon Sep 17 00:00:00 2001 From: "vysakh.menon" Date: Mon, 10 Jul 2023 11:50:04 -0700 Subject: [PATCH 1/3] 16714 nr search --- api/namex/analytics/solr.py | 164 ++++++++++++++---- api/namex/resources/requests.py | 116 +++++++++++-- .../python/end_points/test_namex_search.py | 48 +++++ api/tests/python/solr/test_query_string.py | 30 +++- 4 files changed, 304 insertions(+), 54 deletions(-) diff --git a/api/namex/analytics/solr.py b/api/namex/analytics/solr.py index 77f24b1af..27e4dacb7 100644 --- a/api/namex/analytics/solr.py +++ b/api/namex/analytics/solr.py @@ -35,6 +35,7 @@ class SolrQueries: HISTORY = 'histories' TRADEMARKS = 'trademarks' RESTRICTED_WORDS = 'restricted_words' + NAME_NR_SEARCH = 'name_nr_search' VALID_QUERIES = [CONFLICTS, HISTORY, TRADEMARKS] # @@ -119,7 +120,15 @@ class SolrQueries: '&fl=application_number,name,status,description,score' '&bq=status:%22Registration%20published%22^5.0' '&sort=score%20desc' - '{name_copy_clause}' + '{name_copy_clause}', + NAME_NR_SEARCH: + '/solr/names/select?' + 'indent=on' + '&q={query}' + '&sort=score%20desc,start_date%20desc' + '&wt=json' + '&start={start}&rows={rows}' + '&fl=nr_num,score' } @classmethod @@ -198,9 +207,9 @@ def get_conflict_results(cls, name, bucket, exact_phrase, start=0, rows=100): solr['response']['numFound'] += result['response']['numFound'] result_name = parse.unquote(connection[1]) if previous_stack_title.replace(' ', '') != result_name.replace(' ', ''): - stack_title_info = {'name_info': {'name': result_name}, 'stems': stemmed_words[:int(stem_count/2)]} - for word in list_name_split[:int(stem_count/2)]: - for stem in stemmed_words[:int(stem_count/2)]: + stack_title_info = {'name_info': {'name': result_name}, 'stems': stemmed_words[:int(stem_count / 2)]} + for word in list_name_split[:int(stem_count / 2)]: + for stem in stemmed_words[:int(stem_count / 2)]: if stem in word: break elif stem[:-1] in word: @@ -296,7 +305,7 @@ def get_conflict_results(cls, name, bucket, exact_phrase, start=0, rows=100): stem = [synonym.upper()[:-1]] stack_title_info = solr['response']['docs'].pop() if stem[0] not in name['stems']: - sorted_names.append({'name_info': name['name_info'],'stems': stem + name['stems'].copy()}) + sorted_names.append({'name_info': name['name_info'], 'stems': stem + name['stems'].copy()}) if stem[0] not in stack_title_info['stems'] and synonym.upper() in stack_title_info['stems']: stack_title_info['stems'] += stem else: @@ -360,14 +369,14 @@ def get_synonym_results(cls, solr_base_url, name, prox_search_strs, old_alg_sear connections = [] if name == '': name = '*' - prox_search_strs.append((['*'],'','',1)) + prox_search_strs.append((['*'], '', '', 1)) old_alg_search_strs.append('*') for prox_search_tuple, old_alg_search in zip(prox_search_strs, old_alg_search_strs): old_alg_search_str = old_alg_search[:-2].replace(' ', '%20') + '*' # [:-2] takes off the last '\ ' synonyms_clause = cls._get_synonyms_clause(prox_search_tuple[1], prox_search_tuple[2], name_tokens) if exact_phrase == '' else '' - exact_phrase_clause = '&fq=contains_exact_phrase:' + '\"' + parse.quote(exact_phrase).replace('%2A','') + '\"' if exact_phrase != '' else '' + exact_phrase_clause = '&fq=contains_exact_phrase:' + '\"' + parse.quote(exact_phrase).replace('%2A', '') + '\"' if exact_phrase != '' else '' if name.find('*') == -1: for name in prox_search_tuple[0]: @@ -381,7 +390,7 @@ def get_synonym_results(cls, solr_base_url, name, prox_search_strs, old_alg_sear ) current_app.logger.debug('Query: ' + query) connections.append((json.load(request.urlopen(query)), - '----' + prox_search_str.replace('\\', '').replace('*','').replace('@','') + '----' + prox_search_str.replace('\\', '').replace('*', '').replace('@', '') + synonyms_clause.replace('&fq=name_with_', ' ').replace('%20', ', ') + ' - PROXIMITY SEARCH')) @@ -395,7 +404,7 @@ def get_synonym_results(cls, solr_base_url, name, prox_search_strs, old_alg_sear ) current_app.logger.debug('Query: ' + query) connections.append((json.load(request.urlopen(query)), '----' + - old_alg_search_str.replace('\\', '').replace('%20', ' ').replace('**','*') + + old_alg_search_str.replace('\\', '').replace('%20', ' ').replace('**', '*') + synonyms_clause.replace('&fq=name_with_', ' ').replace('%20', ', ') + ' - EXACT WORD ORDER')) return connections @@ -408,7 +417,7 @@ def get_synonym_results(cls, solr_base_url, name, prox_search_strs, old_alg_sear def get_cobrs_phonetic_results(cls, solr_base_url, search_strs, name_tokens, start=0, rows=100): try: if search_strs == []: - connections = [({'response':{'numFound':0,'docs':[]},'responseHeader':{'params':{'q':'*'}}},'----*')] + connections = [({'response': {'numFound': 0, 'docs': []}, 'responseHeader':{'params': {'q': '*'}}}, '----*')] else: connections = [] for str_tuple in search_strs: @@ -424,7 +433,7 @@ def get_cobrs_phonetic_results(cls, solr_base_url, search_strs, name_tokens, sta ) current_app.logger.debug('Query: ' + query) result = json.load(request.urlopen(query)) - connections.append((result, '----' + start_str.replace('*','').replace('@','') + + connections.append((result, '----' + start_str.replace('*', '').replace('@', '') + synonyms_clause.replace('&fq=name_with_', ' ').replace('%20', ', '))) return connections @@ -436,7 +445,7 @@ def get_cobrs_phonetic_results(cls, solr_base_url, search_strs, name_tokens, sta def get_phonetic_results(cls, solr_base_url, name, search_strs, name_tokens, start=0, rows=100): try: if search_strs == []: - connections = [({'response':{'numFound':0,'docs':[]},'responseHeader':{'params':{'q':'*'}}},'----*')] + connections = [({'response': {'numFound': 0, 'docs': []}, 'responseHeader':{'params': {'q': '*'}}}, '----*')] else: connections = [] for str_tuple in search_strs: @@ -453,7 +462,7 @@ def get_phonetic_results(cls, solr_base_url, name, search_strs, name_tokens, sta result = json.load(request.urlopen(query)) docs = result['response']['docs'] result['response']['docs'] = cls.post_treatment(docs, start_str) - connections.append((result, '----' + start_str.replace('*','').replace('@','') + + connections.append((result, '----' + start_str.replace('*', '').replace('@', '') + synonyms_clause.replace('&fq=name_with_', ' ').replace('%20', ', '))) return connections @@ -461,6 +470,90 @@ def get_phonetic_results(cls, solr_base_url, name, search_strs, name_tokens, sta current_app.logger.error(err, query) return None, 'SOLR query error', 500 + @classmethod + def get_name_nr_search_results(cls, solr_query, start=0, rows=10): + """Search for the query param in `names` core.""" + solr_base_url = current_app.config.get('SOLR_BASE_URL', None) + if not solr_base_url: + current_app.logger.error('SOLR: SOLR_BASE_URL is not set') + return None, 'Internal server error', 500 + + try: + query = solr_base_url + SolrQueries.queries[SolrQueries.NAME_NR_SEARCH].format( + start=start, + rows=rows, + query=solr_query + ) + current_app.logger.debug('Query: ' + query) + connection = request.urlopen(query) + except Exception as err: + current_app.logger.error(err, query) + return None, 'Internal server error', 500 + + try: + solr = json.load(connection) + results = { + 'response': { + 'numFound': solr['response']['numFound'], + 'start': solr['response']['start'], + 'rows': solr['responseHeader']['params']['rows'], + 'maxScore': solr['response']['maxScore'], + 'name': solr['responseHeader']['params']['q'] + }, + 'names': solr['response']['docs'] + } + return results, '', None + except Exception as err: + current_app.logger.error(err, query) + return None, 'Internal server error', 500 + + @classmethod + def get_parsed_query_name_nr_search(cls, value: str): + """Build query to search nr number or name. + + - `None` -> *:* + - NR 1234567 -> nr_num:*1234567* + - HNR239 HOLDINGS -> (name_copy:*HNR239* AND name_copy:*HOLDINGS*) + - NR 955 HNR239 HOLDINGS -> nr_num:*955* AND (name_copy:*HNR239* AND name_copy:*HOLDINGS) + - HNR239 HOLDINGS NR 955 -> nr_num:*955* AND (name_copy:*HNR239* AND name_copy:*HOLDINGS) + - HNR239 NR 955 HOLDINGS -> nr_num:*955* OR + (name_copy:*HNR239* AND name_copy:*NR* AND name_copy:*955* AND name_copy:*HOLDINGS) + """ + solr_query = '*:*' + nr_number = None + if value: + value = value.strip() + + nr_num = '' + # match whole/start/end string NR 1234567, NR1234567 + nr_num_regex = r'(^(NR( |)[0-9]+)$)|(^(NR( |)[0-9]+)\s)|(\s(NR( |)[0-9]+)$)' + nr_num_fallback_regex = r'(^[0-9]+$)|(^[0-9]+\s)|(\s[0-9]+$)' # 1234567 + if result := re.search(nr_num_regex, value, re.IGNORECASE): + matching_nr = result.group() + nr_number = re.sub('NR', '', matching_nr, flags=re.IGNORECASE).strip() + value = value.replace(matching_nr, '', 1).strip() # removing nr num + nr_num = 'nr_num:*' + nr_number + '*' + if value: + nr_num += ' AND' # Get results which match nr_num and name + else: + return nr_num, nr_number, value + elif result := re.search(nr_num_fallback_regex, value): + nr_number = result.group().strip() + nr_num = 'nr_num:*' + nr_number + '* OR' + + name_copy = 'name_copy:*' + name_copy += '* AND name_copy:*'.join(value.split()) + name_copy += '*' # name_copy += '* AND' + + # name = f'({name_copy} name:(*"{value}"*))' + name = f'({name_copy})' + + solr_query = parse.quote(f'{nr_num} {name}'.strip()) + + # 'nr_num:*0285176* OR (name_copy:*0285176* AND name:(*"0285176"*))' + + return solr_query, nr_number, value + @classmethod def get_results(cls, query_type, name, start=0, rows=10): solr_base_url = current_app.config.get('SOLR_BASE_URL', None) @@ -517,9 +610,9 @@ def _compress_name(cls, name): # TODO: these should be loaded from somewhere. designations = [ 'corp.', 'corporation', 'inc.', 'incorporated', 'incorporee', 'l.l.c.', 'limited liability co.', - 'limited liability company', 'limited liability partnership', 'limited partnership','limitee', 'llc', 'llp', 'ltd.', 'ltee', + 'limited liability company', 'limited liability partnership', 'limited partnership', 'limitee', 'llc', 'llp', 'ltd.', 'ltee', 'sencrl', 'societe a responsabilite limitee', 'societe en nom collectif a responsabilite limitee', 'srl', - 'ulc', 'unlimited liability company', 'limited',] + 'ulc', 'unlimited liability company', 'limited', ] # Match the designation with whitespace before and either followed by whitespace or end of line. for designation in designations: @@ -549,7 +642,7 @@ def _tokenize(cls, line: str, categories: List[str] = []) -> List[str]: :param categories: List[str]: a list of strings used as categories to classify the tokens :return: List[str]: a list of string tokens that can be parsed left-> as order is preserved """ - tokens = [] # yep, lazy format + tokens = [] # yep, lazy format start_token: int = 0 idx: int category: List[str] = None @@ -633,7 +726,6 @@ def _parse_for_synonym_candidates(cls, tokens: List[str]) -> List[str]: return candidates - @classmethod def _get_concatenated_terms(cls, candidates): @@ -644,14 +736,14 @@ def _get_concatenated_terms(cls, candidates): for x in range(len(candidates)): if x < len(candidates) - 1: - multiples.append("".join(candidates[x:x+2])) + multiples.append("".join(candidates[x:x + 2])) if x < len(candidates) - 2: - multiples.append("".join(candidates[x:x+3])) + multiples.append("".join(candidates[x:x + 3])) return multiples - # Call the synonyms API for the given token. + @classmethod def _synonyms_exist(cls, token, col): solr_synonyms_api_url = current_app.config.get('SOLR_SYNONYMS_API_URL', None) @@ -704,7 +796,7 @@ def _get_synonym_list(cls, token): # Look up each token in name, and if it is in the synonyms then we need to search for it separately. @classmethod - def _get_synonyms_clause(cls, name, stemmed_name, name_tokens={'full_words':[], 'stemmed_words':[]}): + def _get_synonyms_clause(cls, name, stemmed_name, name_tokens={'full_words': [], 'stemmed_words': []}): # name = re.sub(' +', ' ', name) current_app.logger.debug('getting synonyms for: {}'.format(name)) clause = '' @@ -717,7 +809,7 @@ def _get_synonyms_clause(cls, name, stemmed_name, name_tokens={'full_words':[], string.ascii_lowercase]) candidates = cls._parse_for_synonym_candidates(tokens) for token in candidates: - for full, stem in zip(name_tokens['full_words'],name_tokens['stemmed_words']): + for full, stem in zip(name_tokens['full_words'], name_tokens['stemmed_words']): if token.upper() == full.upper(): token = stem break @@ -726,10 +818,10 @@ def _get_synonyms_clause(cls, name, stemmed_name, name_tokens={'full_words':[], if stemmed_name: tokens = cls._tokenize(stemmed_name.lower(), [string.digits, - string.whitespace, - RESERVED_CHARACTERS, - string.punctuation, - string.ascii_lowercase]) + string.whitespace, + RESERVED_CHARACTERS, + string.punctuation, + string.ascii_lowercase]) candidates = cls._parse_for_synonym_candidates(tokens) for token in candidates: if cls._synonyms_exist(token, 'stems_text') and token.upper() not in synonyms: @@ -803,7 +895,7 @@ def remove_stopwords_designations(cls, name): # TODO: these should be loaded from somewhere. designations = [ 'corp.', 'corp', 'corporation', 'inc.', 'inc', 'incorporated', 'incorporee', 'l.l.c.', 'llc', 'limited partnership', - 'limited liability co.', 'limited liability co','limited liability company', 'limited liability partnership', 'limitee', + 'limited liability co.', 'limited liability co', 'limited liability company', 'limited liability partnership', 'limitee', 'llp', 'ltd.', 'ltd', 'ltee', 'sencrl', 'societe a responsabilite limitee', 'societe en nom collectif a responsabilite limitee', 'limited', 'srl', 'ulc', 'unlimited liability company'] @@ -839,8 +931,8 @@ def combine_multi_word_synonyms(cls, name, solr_base_url): max_len = len(name.split()) * 2 query = solr_base_url + \ - '/solr/possible.conflicts/analysis/field?analysis.fieldvalue={name}&analysis.fieldname=name' \ - '&wt=json&indent=true'.format(name=parse.quote(name.strip()).replace('%2A', '')) + '/solr/possible.conflicts/analysis/field?analysis.fieldvalue={name}&analysis.fieldname=name' \ + '&wt=json&indent=true'.format(name=parse.quote(name.strip()).replace('%2A', '')) current_app.logger.debug('Query: ' + query) processed_words = json.load(request.urlopen(query)) @@ -863,7 +955,7 @@ def combine_multi_word_synonyms(cls, name, solr_base_url): name = parse.unquote(name) processed_list = name.split() - return processed_list,name.strip() + return processed_list, name.strip() @classmethod def build_solr_search_strs(cls, name, stemmed_name, name_tokens): @@ -898,7 +990,7 @@ def replace_nth(string, deleted_substr, added_substr, n): prox_compounded_words = [prox_combined_terms.strip()] if num_terms > 2: - prox_compounded_words.append(prox_combined_terms.replace(' ','')) + prox_compounded_words.append(prox_combined_terms.replace(' ', '')) # concat for compound versions of combined terms combined_terms_list = prox_combined_terms.split() @@ -918,7 +1010,7 @@ def replace_nth(string, deleted_substr, added_substr, n): @classmethod def get_synonyms_for_words(cls, list_name_split): # get synonym list for each word in the name - list_name_split = [wrd.replace('*','').upper() for wrd in list_name_split] + list_name_split = [wrd.replace('*', '').upper() for wrd in list_name_split] synonyms_for_word = {} for word in list_name_split: synonyms_for_word[word] = [x.upper().strip() for x in cls._get_synonym_list(word)] @@ -948,11 +1040,11 @@ def word_pre_processing(cls, list_of_words, type, solr_base_url): for item in list_of_words: words_to_process += ' ' + item - return_dict = {'stems':[]} + return_dict = {'stems': []} if words_to_process != '': query = solr_base_url + \ - '/solr/possible.conflicts/analysis/field?analysis.fieldvalue={words}&analysis.fieldname=name' \ - '&wt=json&indent=true'.format(words=parse.quote(words_to_process.strip())) + '/solr/possible.conflicts/analysis/field?analysis.fieldvalue={words}&analysis.fieldname=name' \ + '&wt=json&indent=true'.format(words=parse.quote(words_to_process.strip())) current_app.logger.debug('Query: ' + query) processed_words = json.load(request.urlopen(query)) @@ -1094,7 +1186,7 @@ def keep_phonetic_match(cls, word, query): word_sound = word_first_consonant + word_first_vowels if word_sound == query_sound: - return True + return True return False diff --git a/api/namex/resources/requests.py b/api/namex/resources/requests.py index 9ed9ef45d..790cc8856 100644 --- a/api/namex/resources/requests.py +++ b/api/namex/resources/requests.py @@ -13,7 +13,7 @@ from sqlalchemy.orm import load_only, lazyload, eagerload from sqlalchemy.orm.exc import NoResultFound -from sqlalchemy import and_, func, or_, text +from sqlalchemy import and_, func, or_, text, Date from sqlalchemy.inspection import inspect from namex import jwt, nro, services @@ -233,16 +233,16 @@ def get(*args, **kwargs): if compName: compName = compName.strip().replace(' ', '%') - ## nameSearch column is populated like: '|1|2|3 + # nameSearch column is populated like: '|1|2|3 # to ensure we don't get a match that spans over a single name compName1 = '%|1%' + compName + '%1|%' compName2 = '%|2%' + compName + '%2|%' compName3 = '%|3%' + compName + '%3|%' q = q.filter(or_( - RequestDAO.nameSearch.ilike(compName1), - RequestDAO.nameSearch.ilike(compName2), - RequestDAO.nameSearch.ilike(compName3) - )) + RequestDAO.nameSearch.ilike(compName1), + RequestDAO.nameSearch.ilike(compName2), + RequestDAO.nameSearch.ilike(compName3) + )) if firstName: firstName = firstName.strip().replace(' ', '%') @@ -321,7 +321,7 @@ def get(*args, **kwargs): submittedStartDateTimeUtc = submittedStartDateTimeUtcObj.strftime(DATE_TIME_FORMAT_SQL) q = q.filter(RequestDAO.submittedDate >= text('\'{submittedStartDateTimeUtc}\'' - .format(submittedStartDateTimeUtc=submittedStartDateTimeUtc))) + .format(submittedStartDateTimeUtc=submittedStartDateTimeUtc))) except ValueError as ve: return jsonify({"message": "Invalid submittedStartDate: {}. Must be of date format %Y-%m-%d" .format(submittedStartDate)}), 400 @@ -339,7 +339,7 @@ def get(*args, **kwargs): .format(submittedEndDate)}), 400 if (submittedStartDateTimeUtcObj and submittedEndDateTimeUtcObj)\ - and submittedEndDateTimeUtcObj < submittedStartDateTimeUtcObj: + and submittedEndDateTimeUtcObj < submittedStartDateTimeUtcObj: return jsonify({"message": "submittedEndDate must be after submittedStartDate"}), 400 q = q.order_by(text(sort_by)) @@ -382,9 +382,100 @@ def post(self, *args, **kwargs): return jsonify({'message': 'Not Implemented'}), 501 # For sbc-auth - My Business Registry page. -@cors_preflight("POST") -@api.route('/search', methods=['POST']) + + +@cors_preflight("GET, POST") +@api.route('/search', methods=['GET', 'POST', 'OPTIONS']) class RequestSearch(Resource): + """Search for NR's.""" + + @staticmethod + @cors.crossdomain(origin='*') + # @jwt.requires_auth + def get(): + """Query for name requests. + + example: query=NR3742302 or query=abcd + """ + data = [] + start = request.args.get('start', 0) + rows = request.args.get('rows', 10) + query = request.args.get('query', '') + if not query: + return jsonify(data), 200 + + try: + solr_query, nr_number, nr_name = SolrQueries.get_parsed_query_name_nr_search(query) + condition = '' + if nr_number: + condition = f"requests.nr_num ILIKE '%{nr_number}%'" + if nr_name: + if condition: + condition += ' OR ' + name_condition = "requests.name_search ILIKE '%" + name_condition += "%' AND requests.name_search ILIKE '%".join(nr_name.split()) + name_condition += "%'" + + condition += f'({name_condition})' + + results = RequestDAO.query.filter( + RequestDAO.stateCd.in_([State.DRAFT, State.INPROGRESS, State.REFUND_REQUESTED]), + text(f'({condition})') + ).options( + lazyload('*'), + eagerload(RequestDAO.names).load_only(Name.name), + load_only( + RequestDAO.id, + RequestDAO.nrNum + ) + ).limit(rows).all() + + data.extend([{ + # 'id': nr.id, + 'nrNum': nr.nrNum, + 'names': [n.name for n in nr.names] + } for nr in results]) + + while len(data) < rows: + nr_data, have_more_data = RequestSearch._get_next_set_from_solr(solr_query, start, rows) + nr_data = nr_data[:(rows - len(data))] + data.extend([{ + # 'id': nr.id, + 'nrNum': nr.nrNum, + 'names': [n.name for n in nr.names] + } for nr in nr_data]) + + if not have_more_data: + break # no more data in solr + start += rows + + return jsonify(data), 200 + except Exception: + return jsonify({'message': 'Internal server error'}), 500 + + @staticmethod + def _get_next_set_from_solr(solr_query, start, rows): + results, msg, code = SolrQueries.get_name_nr_search_results(solr_query, start, rows) + if code: + raise Exception(msg) + elif len(results['names']) > 0: + have_more_data = results['response']['numFound'] > (start + rows) + identifiers = [name['nr_num'] for name in results['names']] + return RequestDAO.query.filter( + RequestDAO.nrNum.in_(identifiers), + or_(RequestDAO.stateCd != State.EXPIRED, + text(f"(requests.state_cd = '{State.EXPIRED}' AND CAST(requests.expiration_date AS DATE) + " + "interval '60 day' >= CAST(now() AS DATE))")) + ).options( + lazyload('*'), + eagerload(RequestDAO.names).load_only(Name.name), + load_only( + RequestDAO.id, + RequestDAO.nrNum + ) + ).all(), have_more_data + + return [], False @staticmethod @cors.crossdomain(origin='*') @@ -393,7 +484,7 @@ def post(): search = request.get_json() identifiers = search.get('identifiers', []) - # Only names and applicants are needed for this query, we want this query to be lighting fast + # Only names and applicants are needed for this query, we want this query to be lighting fast # to prevent putting a load on namex-api. q = RequestDAO.query.filter(RequestDAO.nrNum.in_(identifiers)) \ .options( @@ -608,7 +699,6 @@ def patch(nr, *args, **kwargs): return jsonify(message='Request:{} - patched'.format(nr)), 200 - def _email_report(nr_id): report = ReportResource() report.email_report(nr_id) @@ -777,7 +867,7 @@ def put(nr, *args, **kwargs): is_changed__request_state = True if nrd.consentFlag != orig_nrd['consentFlag']: is_changed_consent = True - emailer_enable = ldclient.get().variation('emailer-enable', {'key': 'anonymous'}, False) + emailer_enable = ldclient.get().variation('emailer-enable', {'key': 'anonymous'}, False) if emailer_enable: thread = FlaskThread(target=Request._email_consent, args=(nrd.id, )) thread.daemon = True diff --git a/api/tests/python/end_points/test_namex_search.py b/api/tests/python/end_points/test_namex_search.py index 713a9b53f..561578efd 100644 --- a/api/tests/python/end_points/test_namex_search.py +++ b/api/tests/python/end_points/test_namex_search.py @@ -6,6 +6,7 @@ from datetime import datetime, timedelta from http import HTTPStatus from typing import List +from namex.analytics.solr import SolrQueries from namex.models import Applicant, Name, Request, State, User from tests.python.end_points.util import create_header @@ -638,3 +639,50 @@ def test_request_search_system_only(client, jwt, app): assert rv.status_code not in [HTTPStatus.OK, HTTPStatus.ACCEPTED, HTTPStatus.CREATED] # assert rv.json['code'] == 'missing_a_valid_role' # assert rv.json['description'] == 'Missing a role required to access this endpoint' + + +@pytest.mark.parametrize('search_name, expected_len', [ + ('test name one', 1), + ('name test one', 1), + ('one test name', 1), + ('1234567', 1), + ('nr1234567', 1), + ('nr 1234567', 1), + ('NR1234567', 1), + ('NR 1234567', 1), + ('NR123 test one', 1), + ('12345678', 0), + ('test 1234567 name one', 0) +]) +def test_search_get(client, jwt, app, monkeypatch, search_name, expected_len): + + nr_no_return = Request() + nr_no_return.nrNum = 'NR 7654321' + nr_no_return.stateCd = State.DRAFT + name1 = Name() + name1.choice = 1 + name1.name = 'SHOULD NOT RETURN' + nr_no_return.names = [name1] + nr_no_return.save_to_db() + + nr = Request() + nr.nrNum = 'NR 1234567' + nr.stateCd = State.DRAFT + name1 = Name() + name1.choice = 1 + name1.name = 'TEST NAME ONE' + nr.names = [name1] + nr.save_to_db() + + def mock_get_name_nr_search_results(solr_query, start=0, rows=10): + return ({'names': []}, '', None) + monkeypatch.setattr(SolrQueries, 'get_name_nr_search_results', mock_get_name_nr_search_results) + + # create JWT & setup header with a Bearer Token using the JWT + headers = create_header(jwt, ['public_user']) + rv = client.get(f'/api/v1/requests/search?query={search_name}', headers=headers) + assert rv.status_code == HTTPStatus.OK + assert len(rv.json) == expected_len + if expected_len > 0: + assert rv.json[0]['nrNum'] == nr.nrNum + assert rv.json[0]['names'] == [name1.name] diff --git a/api/tests/python/solr/test_query_string.py b/api/tests/python/solr/test_query_string.py index c5b27d81d..c836a8373 100644 --- a/api/tests/python/solr/test_query_string.py +++ b/api/tests/python/solr/test_query_string.py @@ -3,6 +3,7 @@ # import string from typing import List +from urllib import parse import pytest @@ -31,6 +32,7 @@ def test_compress_name(name, expected): assert expected == response + name_copy_test_data = [ ('waffle corp', ''), ('waffle ' + NO_SYNONYMS_INDICATOR + ' corp', ''), @@ -90,10 +92,10 @@ def test_tokenz(name_string, expected): name_parse_data = [ (['skinny', ' ', '"', 'puppy', '-', 'records', '"'], ['skinny', 'puppy', 'records', 'skinnypuppy', 'skinnypuppyrecords', 'puppyrecords']), (['skinny', ' ', '-', '"', 'records', '"'], ['skinny']), - (['skinny', ' ', '"', 'puppy', ' ', 'records', '"'], ['skinny', 'puppy', 'records','skinnypuppy', 'skinnypuppyrecords', 'puppyrecords']), - (['skinny', ' ', '"', 'puppy', '-', 'records', '"'], ['skinny', 'puppy', 'records','skinnypuppy', 'skinnypuppyrecords', 'puppyrecords']), - (['skinny', ' ', 'puppy', '-', 'records'], ['skinny', 'puppy', 'records','skinnypuppy', 'skinnypuppyrecords', 'puppyrecords']), - (['skinny', ' ', 'puppy', ' ', '-', 'records'], ['skinny', 'puppy','skinnypuppy']), + (['skinny', ' ', '"', 'puppy', ' ', 'records', '"'], ['skinny', 'puppy', 'records', 'skinnypuppy', 'skinnypuppyrecords', 'puppyrecords']), + (['skinny', ' ', '"', 'puppy', '-', 'records', '"'], ['skinny', 'puppy', 'records', 'skinnypuppy', 'skinnypuppyrecords', 'puppyrecords']), + (['skinny', ' ', 'puppy', '-', 'records'], ['skinny', 'puppy', 'records', 'skinnypuppy', 'skinnypuppyrecords', 'puppyrecords']), + (['skinny', ' ', 'puppy', ' ', '-', 'records'], ['skinny', 'puppy', 'skinnypuppy']), (['skinny', ' ', '@', 'puppy'], ['skinny']), (['skinny', ' ', '@', '"', 'puppy', ' ', 'records', '"'], ['skinny']), (['skinny', ' ', '@', '"', 'puppy', '-', 'records', '"'], ['skinny']), @@ -107,7 +109,25 @@ def test_parse_for_synonym_candidates(tokens, expected): synonym_candidates = SolrQueries._parse_for_synonym_candidates(tokens) - print (synonym_candidates) + print(synonym_candidates) assert expected == synonym_candidates + +@pytest.mark.parametrize('search_value, expected_solr_query, expected_nr_number, expected_nr_name', [ + (None, '*:*', None, None), + ('test name one', '(name_copy:*test* AND name_copy:*name* AND name_copy:*one*)', None, 'test name one'), + ('1234567', 'nr_num:*1234567* OR (name_copy:*1234567*)', '1234567', '1234567'), + ('nr1234567', 'nr_num:*1234567*', '1234567', ''), + ('nr 1234567', 'nr_num:*1234567*', '1234567', ''), + ('NR1234567', 'nr_num:*1234567*', '1234567', ''), + ('NR 1234567', 'nr_num:*1234567*', '1234567', ''), + ('NR123 test one', 'nr_num:*123* AND (name_copy:*test* AND name_copy:*one*)', '123', 'test one'), + ('test 123 one', '(name_copy:*test* AND name_copy:*123* AND name_copy:*one*)', None, 'test 123 one'), + ('123 test on', 'nr_num:*123* OR (name_copy:*123* AND name_copy:*test* AND name_copy:*on*)', '123', '123 test on') +]) +def test_get_parsed_query_name_nr_search(search_value, expected_solr_query, expected_nr_number, expected_nr_name): + solr_query, nr_number, nr_name = SolrQueries.get_parsed_query_name_nr_search(search_value) + assert expected_solr_query == parse.unquote(solr_query) + assert expected_nr_number == nr_number + assert expected_nr_name == nr_name From dd61a6a62b008fc88bd0ceb19e6304a4c7e244f9 Mon Sep 17 00:00:00 2001 From: "vysakh.menon" Date: Mon, 10 Jul 2023 13:11:30 -0700 Subject: [PATCH 2/3] no message --- api/namex/resources/requests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/namex/resources/requests.py b/api/namex/resources/requests.py index 790cc8856..e480db617 100644 --- a/api/namex/resources/requests.py +++ b/api/namex/resources/requests.py @@ -391,7 +391,7 @@ class RequestSearch(Resource): @staticmethod @cors.crossdomain(origin='*') - # @jwt.requires_auth + @jwt.requires_auth def get(): """Query for name requests. From 53cb72674d1736800a1f14a36c771fe1b87ab932 Mon Sep 17 00:00:00 2001 From: "vysakh.menon" Date: Tue, 11 Jul 2023 10:13:02 -0700 Subject: [PATCH 3/3] no message --- api/namex/resources/requests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/namex/resources/requests.py b/api/namex/resources/requests.py index e480db617..f41b7764b 100644 --- a/api/namex/resources/requests.py +++ b/api/namex/resources/requests.py @@ -428,7 +428,7 @@ def get(): RequestDAO.id, RequestDAO.nrNum ) - ).limit(rows).all() + ).order_by(RequestDAO.submittedDate.desc()).limit(rows).all() data.extend([{ # 'id': nr.id,