Skip to content

Commit

Permalink
server
Browse files Browse the repository at this point in the history
  • Loading branch information
philipperemy committed Nov 7, 2023
1 parent a3b900b commit 8f4c1bc
Show file tree
Hide file tree
Showing 2 changed files with 199 additions and 28 deletions.
110 changes: 83 additions & 27 deletions api/server.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import json
import logging
import sys
from typing import Union
from typing import Union, Optional, Any, List

from flask import Flask, request
from paste.translogger import TransLogger
from waitress import serve
from waitress import serve as _serve

from names_dataset import NameDataset, NameWrapper

Expand All @@ -20,71 +20,127 @@
nd = NameDataset()


def generate_output(d: Union[str, dict], status: bool) -> str:
def _generate_output(d: Union[str, dict]) -> str:
status = 'error' not in d
return json.dumps({'status': status, 'message': d}, ensure_ascii=False, default=str)


@app.errorhandler(404)
def invalid_route(e):
return generate_output('invalid endpoint', status=False)
def _validate_input(
req, names: Union[str, List[str]],
required: bool = True,
default: Optional[Any] = None,
var_type: Any = str
):
if isinstance(names, str):
names = [names]
var = None
for name in names:
var = req.args.get(name)
if var is not None:
break
if var is None and required:
raise ValueError(f'Provide a parameter for [{names[0]}].')
elif var is None:
var = default
return var_type(var) if var is not None else None


@app.route('/')
def main():
return generate_output('Welcome user! Name dataset api. query /search to perform a search.', status=True)
@app.errorhandler(404)
def _invalid_route(e):
return _generate_output('invalid endpoint')


def str2bool(s: Union[bool, str]) -> bool:
def _str2bool(s: Union[bool, str]) -> bool:
if isinstance(s, bool):
return s
# noinspection PyBroadException
try:
return bool(eval(s))
except Exception:
if s.lower() in ['1', '0', 'true', 'y']:
if s.lower() in {'1', 'true', 'y', 't', 'yes', 'on'}:
return True
return False
elif s.lower() in {'0', 'false', 'n', 'no', 'off'}:
return False
raise ValueError(f'Cannot convert to boolean: [{s}].')


def _process_inputs(req):
name = _validate_input(req, 'name', required=True)
n = _validate_input(req, 'n', required=False, default=5, var_type=int)
use_first_names = _validate_input(req, 'use_first_names', required=True, var_type=_str2bool)
gender = _validate_input(req, 'gender', required=False)
country_alpha2 = _validate_input(req, 'country_alpha2', required=False)
return name, n, use_first_names, gender, country_alpha2


@app.route('/')
def _main():
endpoints = [a for a, b in globals().items() if not str(a).startswith('_') and 'function' in str(b)]
return _generate_output(f'Welcome to the Name Search API! List of endpoints: [{", ".join(sorted(endpoints))}].')


@app.route('/country_codes', methods=['GET'])
def country_codes():
try:
req = request
alpha_2 = str2bool(req.args.get('alpha_2', False))
alpha_2 = _str2bool(req.args.get('alpha_2', False))
result = nd.get_country_codes(alpha_2=alpha_2)
return generate_output({'result': result}, status=True)
return _generate_output({'result': result})
except Exception as e:
return generate_output({'error': str(e)}, status=True)
return _generate_output({'error': str(e)})


@app.route('/top', methods=['GET'])
def top():
try:
req = request
n = int(req.args.get('n', 100))
use_first_names = str2bool(req.args.get('use_first_names', True))
use_first_names = _str2bool(req.args.get('use_first_names', True))
country_alpha2 = req.args.get('country_alpha2', None)
gender = req.args.get('gender', None)
result = nd.get_top_names(n, use_first_names, country_alpha2, gender)
return generate_output({'result': result}, status=True)
return _generate_output({'result': result})
except Exception as e:
return generate_output({'error': str(e)}, status=True)
return _generate_output({'error': str(e)})


@app.route('/search', methods=['GET'])
def search():
def search(): # legacy.
try:
req = request
q = req.args.get('q')
if q is None:
return generate_output('provide a parameter q, for example q=Mike', status=False)
name = request.args.get('q')
if name is None:
return _generate_output('provide a parameter q, for example q=Mike')
else:
result = nd.search(q)
result = nd.search(name)
result['describe'] = NameWrapper(result).describe
return generate_output({'result': result}, status=True)
return _generate_output({'result': result})
except Exception as e:
return _generate_output({'error': str(e)})


@app.route('/fuzzy_search', methods=['GET'])
def fuzzy_search():
try:
name, n, use_first_names, gender, country_alpha2 = _process_inputs(request)
result = nd.fuzzy_search(
name=name, n=n, use_first_names=use_first_names, country_alpha2=country_alpha2, gender=gender
)
return _generate_output({'result': result})
except Exception as e:
return _generate_output({'error': str(e)})


@app.route('/autocomplete', methods=['GET'])
def autocomplete():
try:
name, n, use_first_names, gender, country_alpha2 = _process_inputs(request)
result = nd.auto_complete(
name=name, n=n, use_first_names=use_first_names, country_alpha2=country_alpha2, gender=gender
)
return _generate_output({'result': result})
except Exception as e:
return generate_output({'error': str(e)}, status=True)
return _generate_output({'error': str(e)})


if __name__ == '__main__':
serve(TransLogger(app, setup_console_handler=False), port=8888, threads=4)
_serve(TransLogger(app, setup_console_handler=False), port=8888, threads=4)
117 changes: 116 additions & 1 deletion names_dataset/nd_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import zipfile
from collections import defaultdict
from pathlib import Path
from typing import Optional
from typing import Optional, Dict, List

import pycountry

Expand Down Expand Up @@ -49,6 +49,68 @@ def describe(self):
return f'{self.gender}, {self.country}'


def _autocomplete_search(
prefix: str,
names_dict: Dict[str, Dict],
n: int = 5,
gender: Optional[str] = None,
country_alpha2: Optional[str] = None,
max_rank: int = 5000
) -> List[Dict]:
matching_names = []
for name, info in names_dict.items():
if name.startswith(prefix) and not name.startswith(prefix + ' '):
if gender is None or (len(info['gender']) > 0 and gender == max(info['gender'], key=info['gender'].get)):
matching_names.append(name)
result = []
for name in matching_names:
attrs = names_dict[name]
ranks = attrs['rank']
if len(ranks) <= 1:
continue
if country_alpha2 is not None:
if country_alpha2 not in ranks:
continue
rank = ranks[country_alpha2]
else:
rank = int(sum(ranks.values()) / len(ranks))
result.append({'name': name, 'rank': rank})
result = sorted(result, key=lambda x: x['rank'])
result = [r for r in result if r['rank'] < max_rank][:n]
return result


def _fuzzy_search(
fuzzy_name: str,
names_dict: Dict[str, Dict],
n: int = 5,
gender: Optional[str] = None,
country_alpha2: Optional[str] = None,
) -> List[Dict]:
from fuzzywuzzy import fuzz
closest_names = []
for name, info in names_dict.items():
similarity = fuzz.ratio(fuzzy_name, name)
if gender is None or (len(info['gender']) > 0 and gender == max(info['gender'], key=info['gender'].get)):
closest_names.append((name, similarity))
closest_names.sort(key=lambda x: x[1], reverse=True)
result = []
for name in closest_names[0:n * 5]:
attrs = names_dict[name[0]]
ranks = attrs['rank']
if len(ranks) == 0:
continue
if country_alpha2 is not None:
if country_alpha2 not in ranks:
continue
measure = ranks[country_alpha2]
else:
measure = int(sum(ranks.values()) / len(ranks))
result.append({'name': name[0], 'measure': measure})
result = sorted(result, key=lambda x: x['measure'])[0:n]
return result


class NameDataset:

def __init__(self, load_first_names=True, load_last_names=True):
Expand All @@ -58,13 +120,66 @@ def __init__(self, load_first_names=True, load_last_names=True):
last_names_filename = Path(os.path.dirname(__file__)) / 'v3/last_names.zip'
self.first_names = self._read_json_from_zip(first_names_filename) if load_first_names else None
self.last_names = self._read_json_from_zip(last_names_filename) if load_last_names else None
self.country_codes = self.get_country_codes(alpha_2=True)

def auto_complete(
self,
name: str,
n: int = 5,
use_first_names: bool = True,
country_alpha2: Optional[str] = None,
gender: Optional[str] = None,
*args, **kwargs
) -> List[Dict]:
name, gender = self._process_inputs(name, use_first_names, gender, country_alpha2)
names_dict = self.first_names if use_first_names else self.last_names
return _autocomplete_search(
n=n, prefix=name, names_dict=names_dict, gender=gender, country_alpha2=country_alpha2, *args, **kwargs
)

def fuzzy_search(
self,
name: str,
n: int = 5,
use_first_names: bool = True,
country_alpha2: Optional[str] = None,
gender: Optional[str] = None,
) -> List[Dict]:
name, gender = self._process_inputs(name, use_first_names, gender, country_alpha2)
names_dict = self.first_names if use_first_names else self.last_names
return _fuzzy_search(
n=n, fuzzy_name=name, names_dict=names_dict, gender=gender, country_alpha2=country_alpha2
)

@staticmethod
def _read_json_from_zip(zip_file):
with zipfile.ZipFile(zip_file) as z:
with z.open(z.filelist[0]) as f:
return json.load(f)

def _process_inputs(
self,
name: str,
use_first_names: bool,
gender: Optional[str] = None,
country_alpha2: Optional[str] = None
):
q_name = name.strip().title()
if use_first_names and self.first_names is None:
raise ValueError('Select [load_first_names=True] at init.')
if not use_first_names and self.last_names is None:
raise ValueError('Select [load_last_names=True] at init.')
if gender is not None:
if gender.title() in {'M', 'Male'}:
gender = 'M'
elif gender.title() in {'F', 'Female'}:
gender = 'F'
else:
raise ValueError('Invalid gender value.')
if country_alpha2 is not None and country_alpha2 not in self.country_codes:
raise ValueError(f'Invalid Country alpha-2 code. Valid are: {",".join(self.country_codes)}.')
return q_name, gender

def search(self, name: str):
key = name.strip().title()
fn = self._post_process(self.first_names.get(key)) if self.first_names is not None else None
Expand Down

0 comments on commit 8f4c1bc

Please sign in to comment.