Skip to content

Commit

Permalink
[#4]: crawler_id filters to just digits.
Browse files Browse the repository at this point in the history
  • Loading branch information
g.trantham committed Dec 30, 2022
1 parent f9c7de1 commit 4393461
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions src/nldi_crawler/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,22 @@ def download(ctx, source_id):
"""
Download the data associated with a named data source.
"""
cid = sanitize_cid(source_id)
logging.info(" Downloading source %s ", source_id)
try:
source_list = source.fetch_source_table(ctx.obj["DB_URL"], selector=source_id)
source_list = source.fetch_source_table(ctx.obj["DB_URL"], selector=cid )
except ConnectionError:
sys.exit(-2)

if len(source_list) == 0:
click.echo(f"No source found with ID {source_id}")
click.echo(f"No source found with ID {cid}")
return
fname = source.download_geojson(source_list[0])
if fname:
click.echo(f"Source {source_id} downloaded to {fname}")
click.echo(f"Source {cid} downloaded to {fname}")
else:
logging.warning("Download FAILED for source %s", source_id)
click.echo(f"Download FAILED for source {source_id}")
logging.warning("Download FAILED for source %s", cid)
click.echo(f"Download FAILED for source {cid}")
sys.exit(-1)


Expand All @@ -134,6 +135,10 @@ def display(ctx, source_id):
"""
Show details for named data source.
"""
cid = sanitize_cid(source_id)
if not cid:
return

try:
source_list = source.fetch_source_table(ctx.obj["DB_URL"], selector=source_id)
except ConnectionError:
Expand Down Expand Up @@ -261,3 +266,6 @@ def cfg_from_env() -> dict:
# password is a special case. There is no default; it must be explicitly set.
env_cfg["NLDI_DB_PASS"] = os.environ.get("NLDI_DB_PASS")
return env_cfg

def sanitize_cid(input:str) -> str:
return re.sub( "\D*(\d+)\D*", "\g<1>", input)

0 comments on commit 4393461

Please sign in to comment.