diff --git a/web/spiders/__init__.py b/web/spiders/__init__.py index 6364662f..1639b3bf 100644 --- a/web/spiders/__init__.py +++ b/web/spiders/__init__.py @@ -5,6 +5,7 @@ from .spider_ce import Covid19CESpider from .spider_es import Covid19ESSpider +from .spider_go import Covid19GOSpider from .spider_pe import Covid19PESpider from .spider_pr import Covid19PRSpider from .spider_rn import Covid19RNSpider @@ -14,6 +15,7 @@ SPIDERS = [ Covid19CESpider, Covid19ESSpider, + Covid19GOSpider, Covid19PESpider, Covid19PRSpider, Covid19RNSpider, diff --git a/web/spiders/spider_go.py b/web/spiders/spider_go.py new file mode 100644 index 00000000..a8476a1e --- /dev/null +++ b/web/spiders/spider_go.py @@ -0,0 +1,75 @@ +import io +from itertools import groupby +from collections import defaultdict + +import rows + +from .base import BaseCovid19Spider + +class YMDDateField(rows.fields.DateField): + INPUT_FORMAT = "%Y%m%d" + + +class Covid19GOSpider(BaseCovid19Spider): + name = "GO" + start_urls = [ + "http://datasets.saude.go.gov.br/coronavirus/casos_confirmados.csv", + "http://datasets.saude.go.gov.br/coronavirus/obitos_confirmados.csv" + ] + + def __init__(self, report_fobj, case_fobj, *args, **kwargs): + super().__init__(*args, **kwargs) + self.cases = defaultdict(dict) + + + # self.total_confirmed = 0 + # self.total_deaths = 0 + # self.imported_confirmed = 0 + # self.imported_deaths = 0 + + def parse(self, response): + table = rows.import_from_csv( + io.BytesIO(response.body), + encoding=response.encoding, + force_types={"data_notificacao": YMDDateField}, + ) + + last_date = max(row.data_notificacao for row in table) + self.add_report(date=last_date, url=response.url) + + row_key = lambda row: row["codigo_ibge"] + + for city_id, city_data in groupby(table, key=row_key): + if "casos_confirmados.csv" in response.url: + self.cases[city_id]["confirmed"] = len(list(city_data)) + elif "obitos_confirmados.csv" in response.url: + self.cases[city_id]["deaths"] = len(list(city_data)) + + def spider_closed(self): + for city_id, city_data in self.cases.items(): + confirmed = city_data["confirmed"] + deaths = city_data.get("deaths", 0) + + try: + city = self.get_city_name_from_id(city_id) + except KeyError: + imported_confirmed += confirmed + imported_deaths += deaths + else: + self.add_city_case(city=city, confirmed=confirmed, deaths=deaths) + + total_confirmed += confirmed + total_deaths += deaths + + if imported_confirmed == imported_deaths == 0: + imported_confirmed = imported_deaths = None + + self.add_city_case( + city="Importados/Indefinidos", + confirmed=imported_confirmed, + deaths=imported_deaths, + ) + + self.add_state_case(confirmed=total_confirmed, deaths=total_deaths) + + super().spider_closed() \ No newline at end of file