Skip to content

Commit

Permalink
Include -y option to cli
Browse files Browse the repository at this point in the history
  • Loading branch information
luabida committed Dec 21, 2023
1 parent db02048 commit d931f07
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 34 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ To use Episcanner Downloader, follow these steps:
```shell
cd dist/
./episcanner -h
./episcanner -s RJ -d dengue -o /tmp -f duckdb
./episcanner --all -d zika -o /tmp -f csv parquet --verbose
./episcanner -y 2023 -s RJ -d dengue -o /tmp -f duckdb
./episcanner -y 2021 2022 2023 --all -d zika -f csv parquet --verbose
```
*Replace <source> with the desired source (e.g., 'MG', or '--all' to download all states) and <diseases> with the specific diseases you want to download (e.g., 'dengue chikungunya'). Specify the <output_directory> where the data should be saved.*

Expand Down
65 changes: 49 additions & 16 deletions src/scanner/cli.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,30 @@
"""
python src/scanner/cli.py -a -d dengue zika -f csv -v
python src/scanner/cli.py -y 2023 -a -d dengue zika -f csv -v
python src/scanner/cli.py -s SP RJ DF -d dengue zika -f csv parquet duckdb
python src/scanner/cli.py -y 2010 -s SP RJ -d dengue zika -f csv parquet duckdb
python src/scanner/cli.py -s SP -d zika -f csv -o /tmp
python src/scanner/cli.py -y 2020 2021 2022 -s SP -d zika -f csv -o /tmp
"""

import argparse
import datetime

from src.scanner.scanner import EpiScanner
from src.scanner.utils import CACHEPATH, STATES


class YearsAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
cur_year = datetime.datetime.now().year
for value in values:
if int(value) < 2010 or int(value) > cur_year:
raise argparse.ArgumentError(
self,
f"Invalid year: {value}. " f"Options: 2010 to {cur_year}",
)
setattr(namespace, self.dest, list(map(lambda x: x.lower(), values)))


class StatesAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
for value in values:
Expand Down Expand Up @@ -49,6 +62,19 @@ def app():
description="Export EpiScanner data to duckdb, csv and parquet"
)

parser.add_argument(
"-y",
"--years",
nargs="+",
action=StatesAction,
help="""
Years to be scanned.
Example:
-y 2011 2022
""",
required=True,
)

parser.add_argument(
"-s",
"--states",
Expand Down Expand Up @@ -117,16 +143,23 @@ def app():
"-s/--states OR -a/--all"
)

for disease in args.diseases:
for format in args.file_format:
if args.all:
for state in STATES:
EpiScanner(disease, state, args.verbose).export(
format, args.output_dir
)
break

for state in args.states:
EpiScanner(disease, state, args.verbose).export(
format, args.output_dir
)
for year in args.years:
for disease in args.diseases:
for format in args.file_format:
if args.all:
for state in STATES:
EpiScanner(
disease=disease,
uf=state,
year=year,
verbose=args.verbose,
).export(to=format, output_dir=args.output_dir)
break

for state in args.states:
EpiScanner(
disease=disease,
uf=state,
year=year,
verbose=args.verbose,
).export(to=format, output_dir=args.output_dir)
24 changes: 8 additions & 16 deletions src/scanner/scanner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
import asyncio
import os
from collections import defaultdict
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -145,7 +145,7 @@ def export(
self._to_duckdb(output_dir)
file = output_dir / "episcanner.duckdb"

logger.info(f"Data exported successfully to {file}")
logger.info(f"{self.uf} data for {self.year} wrote to {file}")
except (FileNotFoundError, PermissionError) as e:
raise ValueError(f"Failed to write file: {e}")
except Exception as e:
Expand Down Expand Up @@ -234,14 +234,8 @@ def _parse_results(self) -> pd.DataFrame:
data["geocode"].append(gc)
data["muni_name"].append(get_municipality_name(gc))
data["year"].append(c["year"])
params = [
p["params"]
for p in self.results[gc]
][0]
sir_params = [
p["sir_pars"]
for p in self.results[gc]
][0]
params = [p["params"] for p in self.results[gc]][0]
sir_params = [p["sir_pars"] for p in self.results[gc]][0]
data["peak_week"].append(params["tp1"])
data["total_cases"].append(params["L1"])
data["alpha"].append(params["a1"])
Expand Down Expand Up @@ -318,18 +312,16 @@ def _to_duckdb(self, output_dir: str):
).fetchone()[0]

if rows > 0:
if self.verbose:
logger.warning(f"Overriding data for {self.year}")
con.execute(
f"DELETE FROM '{table_name}'"
f" WHERE year = {self.year}"
)
con.execute(
f"INSERT INTO '{table_name}' SELECT * FROM df"
)
con.execute(f"INSERT INTO '{table_name}' SELECT * FROM df")
except duckdb.duckdb.CatalogException:
# table doesn't exist
con.execute(
f"CREATE TABLE '{table_name}' AS SELECT * FROM df"
)
con.execute(f"CREATE TABLE '{table_name}' AS SELECT * FROM df")
finally:
con.unregister("df")
con.close()
Expand Down

0 comments on commit d931f07

Please sign in to comment.