include last merge into binary-build

AlertaDengue · Dec 18, 2023 · 7870273 · 7870273
1 parent 06711fd
commit 7870273
Show file tree

Hide file tree

Showing 4 changed files with 90 additions and 4 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,7 @@ pyscopg2 = "^66.0.2"
 urllib3 = "1.26.15"
 duckdb = "^0.9.2"
 pyinstaller = "^6.3.0"
+epiweeks = "^2.3.0"
 
 [tool.poetry.dev-dependencies]
 pytest = "*"

diff --git a/src/scanner/scanner.py b/src/scanner/scanner.py
@@ -11,7 +11,14 @@
 from sqlalchemy import create_engine
 from sqlalchemy.engine import Engine
 
-from .utils import CACHEPATH, STATES, get_SIR_pars, otim
+from .utils import (
+    CACHEPATH,
+    STATES,
+    comp_duration,
+    get_municipality_name,
+    get_SIR_pars,
+    otim,
+)
 
 
 def make_connection() -> Engine:
@@ -184,23 +191,40 @@ def _save_results(self, geocode, year, results, curve):
                 "sir_pars": get_SIR_pars(results.params.valuesdict()),
             }
         )
-        self.curves[geocode].append({"year": year, "df": curve})
+        self.curves[geocode].append(
+            {
+                "year": year,
+                "df": curve,
+                "residuals": abs(curve.richards - curve.casos_cum),
+                "sum_res": (
+                    sum(abs(curve.richards - curve.casos_cum))
+                    / max(curve.casos_cum)
+                ),
+                "ep_time": comp_duration(curve),
+            }
+        )
 
     def _parse_results(self) -> pd.DataFrame:
         data = {
             "geocode": [],
+            "muni_name": [],
             "year": [],
             "peak_week": [],
             "beta": [],
             "gamma": [],
             "R0": [],
             "total_cases": [],
             "alpha": [],
+            "sum_res": [],
+            "ep_ini": [],
+            "ep_end": [],
+            "ep_dur": [],
         }
 
         for gc, curve in self.curves.items():
             for c in curve:
                 data["geocode"].append(gc)
+                data["muni_name"].append(get_municipality_name(gc))
                 data["year"].append(c["year"])
                 params = [
                     p["params"]
@@ -218,6 +242,12 @@ def _parse_results(self) -> pd.DataFrame:
                 data["beta"].append(sir_params["beta"])
                 data["gamma"].append(sir_params["gamma"])
                 data["R0"].append(sir_params["R0"])
+                data["sum_res"].append(c["sum_res"])
+
+                ep_duration = c["ep_time"]
+                data["ep_ini"].append(ep_duration["ini"])
+                data["ep_end"].append(ep_duration["end"])
+                data["ep_dur"].append(ep_duration["dur"])
 
         return pd.DataFrame(data)
 

diff --git a/src/scanner/utils.py b/src/scanner/utils.py
@@ -1,8 +1,13 @@
+import json
 import os
 import pathlib
+from typing import Union
 
 import lmfit as lm
 import numpy as np
+import pandas as pd
+import requests
+from epiweeks import Week
 from lmfit import Parameters
 
 CACHEPATH = os.getenv(
@@ -44,6 +49,20 @@
 }
 
 
+def get_municipality_name(geocode: Union[str, int]) -> str:
+    """
+    returns municipality name by retrieving data from IBGE API
+    """
+    api = "https://servicodados.ibge.gov.br/api/v1/localidades/municipios/%s"
+
+    res = requests.get(api % str(geocode))
+
+    try:
+        return json.loads(res.text)["microrregiao"]["nome"]
+    except TypeError:
+        raise ValueError(f"Geocode {geocode} not found")
+
+
 # Richards Model
 @np.vectorize
 def richards(L, a, b, t, tj):
@@ -85,11 +104,36 @@ def get_SIR_pars(rp: dict):
     return pars
 
 
+def comp_duration(curve):
+    """
+    This function computes an estimation of the epidemic beginning,
+    duration and end based of the peak of richards model estimated;
+    """
+
+    df_aux = pd.DataFrame()
+
+    df_aux["dates"] = curve.iloc[:52].data_iniSE
+    df_aux["SE"] = [Week.fromdate(i).cdcformat() for i in df_aux["dates"]]
+    df_aux["diff_richards"] = np.concatenate(
+        ([0], np.diff(curve.richards)), axis=0
+    )
+
+    max_c = df_aux["diff_richards"].max()
+    df_aux = df_aux.loc[df_aux.diff_richards >= (0.05) * max_c].sort_index()
+
+    ini = str(df_aux["SE"].values[0])
+    end = str(df_aux["SE"].values[-1])
+    dur = int(end[-2:]) - int(ini[-2:])
+
+    ep_dur = {"ini": ini, "end": end, "dur": dur}
+    return ep_dur
+
+
 def otim(df, t_ini, t_fin, verbose=False):
     df.reset_index(inplace=True)
     df["casos_cum"] = df.casos.cumsum()
     params = Parameters()
-    params.add("gamma", min=0.95, max=1.05)
+    params.add("gamma", min=0.3, max=0.33)
     params.add("L1", min=1.0, max=5e5)
     params.add("tp1", min=5, max=35)
     params.add("b1", min=1e-6, max=1)