Skip to content

Commit

Permalink
adiciona spider para GO
Browse files Browse the repository at this point in the history
  • Loading branch information
endersonmaia committed May 21, 2020
1 parent c8e3388 commit a64b6c3
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 0 deletions.
2 changes: 2 additions & 0 deletions web/spiders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from .spider_ce import Covid19CESpider
from .spider_es import Covid19ESSpider
from .spider_go import Covid19GOSpider
from .spider_pe import Covid19PESpider
from .spider_pr import Covid19PRSpider
from .spider_rn import Covid19RNSpider
Expand All @@ -14,6 +15,7 @@
SPIDERS = [
Covid19CESpider,
Covid19ESSpider,
Covid19GOSpider,
Covid19PESpider,
Covid19PRSpider,
Covid19RNSpider,
Expand Down
75 changes: 75 additions & 0 deletions web/spiders/spider_go.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import io
from itertools import groupby
from collections import defaultdict

import rows

from .base import BaseCovid19Spider

class YMDDateField(rows.fields.DateField):
INPUT_FORMAT = "%Y%m%d"


class Covid19GOSpider(BaseCovid19Spider):
name = "GO"
start_urls = [
"http://datasets.saude.go.gov.br/coronavirus/casos_confirmados.csv",
"http://datasets.saude.go.gov.br/coronavirus/obitos_confirmados.csv"
]

def __init__(self, report_fobj, case_fobj, *args, **kwargs):
super().__init__(*args, **kwargs)
self.cases = defaultdict(dict)


# self.total_confirmed = 0
# self.total_deaths = 0
# self.imported_confirmed = 0
# self.imported_deaths = 0

def parse(self, response):
table = rows.import_from_csv(
io.BytesIO(response.body),
encoding=response.encoding,
force_types={"data_notificacao": YMDDateField},
)

last_date = max(row.data_notificacao for row in table)
self.add_report(date=last_date, url=response.url)

row_key = lambda row: row["codigo_ibge"]

for city_id, city_data in groupby(table, key=row_key):
if "casos_confirmados.csv" in response.url:
self.cases[city_id]["confirmed"] = len(list(city_data))
elif "obitos_confirmados.csv" in response.url:
self.cases[city_id]["deaths"] = len(list(city_data))

def spider_closed(self):
for city_id, city_data in self.cases.items():
confirmed = city_data["confirmed"]
deaths = city_data.get("deaths", 0)

try:
city = self.get_city_name_from_id(city_id)
except KeyError:
imported_confirmed += confirmed
imported_deaths += deaths
else:
self.add_city_case(city=city, confirmed=confirmed, deaths=deaths)

total_confirmed += confirmed
total_deaths += deaths

if imported_confirmed == imported_deaths == 0:
imported_confirmed = imported_deaths = None

self.add_city_case(
city="Importados/Indefinidos",
confirmed=imported_confirmed,
deaths=imported_deaths,
)

self.add_state_case(confirmed=total_confirmed, deaths=total_deaths)

super().spider_closed()

0 comments on commit a64b6c3

Please sign in to comment.