From ed034bb5ddd76cc43011b89d045e950307eb30b5 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Wed, 9 Oct 2024 16:02:26 -0300
Subject: [PATCH 01/56] adding gypscie preprocessing on flow

---
 .../precipitacao_alertario/flows.py           | 123 +++-
 .../precipitacao_alertario/tasks_gypscie.py   | 605 ++++++++++++++++++
 .../precipitacao_alertario/util_gypscie.py    | 162 +++++
 3 files changed, 887 insertions(+), 3 deletions(-)
 create mode 100644 pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py
 create mode 100644 pipelines/meteorologia/precipitacao_alertario/util_gypscie.py

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index bf92102f..6c727787 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# pylint: disable=C0103
+# pylint: disable=C0103, line-too-long
 """
 Flows for precipitacao_alertario.
 """
@@ -13,6 +13,7 @@
 from prefeitura_rio.pipelines_utils.state_handlers import handler_inject_bd_credentials
 from prefeitura_rio.pipelines_utils.tasks import (  # pylint: disable=E0611, E0401
     create_table_and_upload_to_gcs,
+    get_now_datetime,
     task_run_dbt_model_task,
 )
 
@@ -31,11 +32,27 @@
 from pipelines.rj_escritorio.rain_dashboard.constants import (
     constants as rain_dashboard_constants,
 )
-from pipelines.utils.constants import constants as utils_constants
+# from pipelines.utils.constants import constants as utils_constants
 from pipelines.utils.custom import wait_for_flow_run_with_timeout
-from pipelines.utils.dump_db.constants import constants as dump_db_constants
+# from pipelines.utils.dump_db.constants import constants as dump_db_constants
 from pipelines.utils.dump_to_gcs.constants import constants as dump_to_gcs_constants
 
+# preprocessing imports
+from pipelines.precipitation_model.rionowcast.tasks import (  # pylint: disable=E0611, E0401
+    access_api,
+    add_columns_on_dfr,
+    download_datasets_from_gypscie,
+    execute_dataset_processor,
+    get_dataset_info,
+    get_dataset_processor_info,
+    path_to_dfr,
+    register_dataset_on_gypscie,
+    task_wait_run,
+)
+from pipelines.tasks import (  # pylint: disable=E0611, E0401
+    task_create_partitions,
+)
+
 wait_for_flow_run_with_5min_timeout = wait_for_flow_run_with_timeout(timeout=timedelta(minutes=5))
 
 with Flow(
@@ -73,6 +90,38 @@
         default=dump_to_gcs_constants.MAX_BYTES_PROCESSED_PER_TABLE.value,
     )
 
+    # Gypscie parameters
+    environment_id = Parameter("environment_id", default=1, required=False)
+    domain_id = Parameter("domain_id", default=1, required=False)
+    project_id = Parameter("project_id", default=1, required=False)
+    project_name = Parameter("project_name", default="rionowcast_precipitation", required=False)
+
+    # Gypscie processor parameters
+    processor_name = Parameter("processor_name", default="etl_alertario22", required=True)
+    dataset_processor_id = Parameter("dataset_processor_id", default=43, required=False)  # mudar
+
+    # Parameters for saving data on GCP
+    materialize_after_dump = Parameter("materialize_after_dump", default=False, required=False)
+    dump_mode = Parameter("dump_mode", default=False, required=False)
+    dataset_id_previsao_chuva = Parameter(
+        "dataset_id", default="clima_previsao_chuva", required=False
+    )
+    table_id_previsao_chuva = Parameter(
+        "table_id", default="preprocessamento_pluviometro_alertario", required=False
+    )
+
+    # Dataset parameters
+    station_type = Parameter("station_type", default="pluviometro", required=False)
+    source = Parameter("source", default="alertario", required=False)
+
+    # Dataset path, if it was saved on ETL flow or it will be None
+    dataset_path = Parameter("dataset_path", default=None, required=False)  # dataset_path
+    model_version = Parameter("model_version", default=1, required=False)
+
+    #########################
+    #  Start alertario flow #
+    #########################
+
     dfr_pluviometric, dfr_meteorological = download_data()
     (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
@@ -366,6 +415,74 @@
             #         raise_final_state=True,
             #     )
 
+    ####################################
+    #  Start preprocessing flow        #
+    ####################################
+
+    api = access_api()
+
+    dataset_info = get_dataset_info(station_type, source)
+
+    # Get processor information on gypscie
+    with case(dataset_processor_id, None):
+        dataset_processor_response, dataset_processor_id = get_dataset_processor_info(
+            api, processor_name
+        )
+
+    dataset_response = register_dataset_on_gypscie(api, filepath=dataset_path, domain_id=domain_id)
+    # TODO: verifcar no codigo do augustp se são esses os parametros corretos
+    processor_parameters = {
+        "dataset1": str(dataset_path).rsplit("/", maxsplit=1)[-1],
+        "station_type": station_type,
+    }
+
+    dataset_processor_task_id = execute_dataset_processor(
+        api,
+        processor_id=dataset_processor_id,
+        dataset_id=[dataset_response["id"]],
+        environment_id=environment_id,
+        project_id=project_id,
+        parameters=processor_parameters,
+    )
+    wait_run = task_wait_run(api, dataset_processor_task_id, flow_type="processor")
+    dataset_path = download_datasets_from_gypscie(
+        api, dataset_names=[dataset_response["id"]], wait=wait_run
+    )
+    dfr_ = path_to_dfr(dataset_path)
+    # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
+    dfr = add_columns_on_dfr(dfr_, model_version, update_time=True)
+
+    # Save pre-treated data on local file with partitions
+    now_datetime = get_now_datetime()
+    prediction_data_path = task_create_partitions(
+        dfr,
+        partition_date_column=dataset_info["partition_date_column"],
+        savepath="model_prediction",
+        suffix=now_datetime,
+    )
+    ################################
+    #  Save preprocessing on GCP   #
+    ################################
+
+    # Upload data to BigQuery
+    create_table = create_table_and_upload_to_gcs(
+        data_path=prediction_data_path,
+        dataset_id=dataset_id_previsao_chuva,
+        table_id=table_id_previsao_chuva,
+        dump_mode=dump_mode,
+        biglake_table=False,
+    )
+
+    # Trigger DBT flow run
+    with case(materialize_after_dump, True):
+        run_dbt = task_run_dbt_model_task(
+            dataset_id=dataset_id_previsao_chuva,
+            table_id=table_id_previsao_chuva,
+            # mode=materialization_mode,
+            # materialize_to_datario=materialize_to_datario,
+        )
+        run_dbt.set_upstream(create_table)
+
 # para rodar na cloud
 cor_meteorologia_precipitacao_alertario.storage = GCS(constants.GCS_FLOWS_BUCKET.value)
 cor_meteorologia_precipitacao_alertario.run_config = KubernetesRun(
diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py b/pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py
new file mode 100644
index 00000000..840540f0
--- /dev/null
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py
@@ -0,0 +1,605 @@
+# -*- coding: utf-8 -*-
+"""
+Tasks
+"""
+import datetime
+import os
+from pathlib import Path
+from time import sleep
+from typing import Dict, List
+
+import numpy as np
+import pandas as pd
+
+from basedosdados.upload.base import Base
+from google.cloud import bigquery
+from prefect import task
+from prefect.engine.signals import ENDRUN
+from prefect.engine.state import Failed
+from prefeitura_rio.pipelines_utils.infisical import get_secret
+from prefeitura_rio.pipelines_utils.logging import log
+from requests.exceptions import HTTPError
+
+from pipelines.constants import constants  # pylint: disable=E0611, E0401
+from pipelines.precipitation_model.rionowcast.utils import (  # pylint: disable=E0611, E0401
+    GypscieApi,
+    wait_run,
+)
+
+
+# noqa E302, E303
+@task()
+def access_api():
+    """# noqa E303
+    Acess api and return it to be used in other requests
+    """
+    infisical_username = constants.INFISICAL_USERNAME.value
+    infisical_password = constants.INFISICAL_PASSWORD.value
+
+    # username = get_secret(secret_name="USERNAME", path="/gypscie", environment="prod")
+    # password = get_secret(secret_name="PASSWORD", path="/gypscie", environment="prod")
+
+    username = get_secret(infisical_username, path="/gypscie")[infisical_username]
+    password = get_secret(infisical_password, path="/gypscie")[infisical_password]
+    api = GypscieApi(username=username, password=password)
+
+    return api
+
+
+@task()
+def get_billing_project_id(
+    bd_project_mode: str = "prod",
+    billing_project_id: str = None,
+) -> str:
+    """
+    Get billing project id
+    """
+    if not billing_project_id:
+        log("Billing project ID was not provided, trying to get it from environment variable")
+    try:
+        bd_base = Base()
+        billing_project_id = bd_base.config["gcloud-projects"][bd_project_mode]["name"]
+        log(f"Billing project ID was inferred from environment variables: {billing_project_id}")
+    except KeyError:
+        pass
+    if not billing_project_id:
+        raise ValueError(
+            "billing_project_id must be either provided or inferred from environment variables"
+        )
+    log(f"Billing project ID: {billing_project_id}")
+    return billing_project_id
+
+
+def download_data_from_bigquery(query: str, billing_project_id: str) -> pd.DataFrame:
+    """ADD"""
+    # pylint: disable=E1124, protected-access
+    # client = google_client(billing_project_id, from_file=True, reauth=False)
+    # job_config = bigquery.QueryJobConfig()
+    # # job_config.dry_run = True
+
+    # # Get data
+    # log("Querying data from BigQuery")
+    # job = client["bigquery"].query(query, job_config=job_config)
+    # https://github.com/prefeitura-rio/pipelines_rj_iplanrio/blob/ecd21c727b6f99346ef84575608e560e5825dd38/pipelines/painel_obras/dump_data/tasks.py#L39
+    bq_client = bigquery.Client(
+        credentials=Base(bucket_name="rj-cor")._load_credentials(mode="prod"),
+        project=billing_project_id,
+    )
+    job = bq_client.query(query)
+    while not job.done():
+        sleep(1)
+
+    # Get data
+    # log("Querying data from BigQuery")
+    # job = client["bigquery"].query(query)
+    # while not job.done():
+    #     sleep(1)
+    log("Getting result from query")
+    results = job.result()
+    log("Converting result to pandas dataframe")
+    dfr = results.to_dataframe()
+    log("End download data from bigquery")
+    return dfr
+
+
+@task()
+def register_dataset_on_gypscie(api, filepath: Path, domain_id: int = 1) -> Dict:
+    """
+    Register dataset on gypscie and return its informations like id.
+    Obs: dataset name must be unique.
+    Return:
+    {
+        'domain':
+        {
+            'description': 'This project has the objective to create nowcasting models.',
+            'id': 1,
+            'name': 'rionowcast_precipitation'
+        },
+        'file_type': 'csv',
+        'id': 18,
+        'name': 'rain_gauge_to_model',
+        'register': '2024-07-02T19:20:32.507744',
+        'uri': 'http://gypscie.dados.rio/api/download/datasets/rain_gauge_to_model.zip'
+    }
+    """
+    log(f"\nStart registring dataset by sending {filepath} Data to Gypscie")
+
+    data = {
+        "domain_id": domain_id,
+        "name": str(filepath).split("/")[-1].split(".")[0]
+        + "_"
+        + datetime.datetime.now().strftime("%Y%m%d%H%M%S"),  # pylint: disable=use-maxsplit-arg
+    }
+    log(type(data), data)
+    files = {
+        "files": open(file=filepath, mode="rb"),  # pylint: disable=consider-using-with
+    }
+
+    response = api.post(path="datasets", data=data, files=files)
+
+    log(f"register_dataset_on_gypscie response: {response} and response.json(): {response.json()}")
+    return response.json()
+
+
+@task(nout=2)
+def get_dataset_processor_info(api, processor_name: str):
+    """
+    Geting dataset processor information
+    """
+    log(f"Getting dataset processor info for {processor_name}")
+    dataset_processors_response = api.get(
+        path="dataset_processors",
+    )
+
+    # log(dataset_processors_response)
+    dataset_processor_id = None
+    for response in dataset_processors_response:
+        if response.get("name") == processor_name:
+            dataset_processor_id = response["id"]
+            # log(response)
+            # log(response["id"])
+    return dataset_processors_response, dataset_processor_id
+
+    # if not dataset_processor_id:
+    #     log(f"{processor_name} not found. Try adding it.")
+
+
+@task()
+# pylint: disable=too-many-arguments
+def execute_dataset_processor(
+    api,
+    processor_id: int,
+    dataset_id: list,  # como pegar os vários datasets
+    environment_id: int,
+    project_id: int,
+    parameters: dict
+    # adicionar campos do dataset_processor
+) -> List:
+    """
+    Requisição de execução de um DatasetProcessor
+    """
+    log("\nStarting executing dataset processing")
+
+    task_response = api.post(
+        path="processor_run",
+        json={
+            "dataset_id": dataset_id,
+            "environment_id": environment_id,
+            "parameters": parameters,
+            "processor_id": processor_id,
+            "project_id": project_id,
+        },
+    )
+    # task_response = {'task_id': '227e74bc-0057-4e63-a30f-8374604e442b'}
+
+    # response = wait_run(api, task_response.json())
+
+    # if response["state"] != "SUCCESS":
+    #     failed_message = "Error processing this dataset. Stop flow or restart this task"
+    #     log(failed_message)
+    #     task_state = Failed(failed_message)
+    #     raise ENDRUN(state=task_state)
+
+    # output_datasets = response["result"]["output_datasets"]  # returns a list with datasets
+    # log(f"\nFinish executing dataset processing, we have {len(output_datasets)} datasets")
+    # return output_datasets
+    return task_response.json(["task_id"])
+
+
+@task()
+def predict(api, model_id: int, dataset_id: int, project_id: int) -> dict:
+    """
+    Requisição de execução de um processo de Predição
+    """
+    print("Starting prediction")
+    response = api.post(
+        path="predict",
+        data={
+            "model_id": model_id,
+            "dataset_id": dataset_id,
+            "project_id": project_id,
+        },
+    )
+    print(f"Prediction ended. Response: {response}, {response.json()}")
+    return response.json()
+
+
+def calculate_start_and_end_date(
+    hours_from_past: int,
+) -> tuple[datetime.datetime, datetime.datetime]:
+    """
+    Calculates the start and end date based on the hours from past
+    """
+    end_date = datetime.datetime.now()
+    start_date = end_date - datetime.timedelta(hours=hours_from_past)
+    return start_date, end_date
+
+
+@task()
+def query_data_from_gcp(  # pylint: disable=too-many-arguments
+    dataset_id: str,
+    table_id: str,
+    billing_project_id: str,
+    start_date: str = None,
+    end_date: str = None,
+    save_format: str = "csv",
+) -> Path:
+    """
+    Download historical data from source.
+    format: csv or parquet
+    """
+    log(f"Start downloading {dataset_id}.{table_id} data")
+
+    directory_path = Path("data/input/")
+    if not os.path.exists(directory_path):
+        os.makedirs(directory_path)
+
+    savepath = directory_path / f"{dataset_id}_{table_id}"  # TODO:
+
+    # pylint: disable=consider-using-f-string
+    # noqa E262
+    query = """
+        SELECT
+            *
+        FROM rj-cor.{}.{}
+        """.format(
+        dataset_id,
+        table_id,
+    )
+
+    # pylint: disable=consider-using-f-string
+    if start_date:
+        filter_query = """
+            WHERE data_particao BETWEEN '{}' AND '{}'
+        """.format(
+            start_date, end_date
+        )
+        query += filter_query
+
+    log(f"Query used to download data:\n{query}")
+
+    dfr = download_data_from_bigquery(query=query, billing_project_id=billing_project_id)
+    if save_format == "csv":
+        dfr.to_csv(f"{savepath}.csv", index=False)
+    elif save_format == "parquet":
+        dfr.to_parquet(f"{savepath}.parquet", index=False)
+    # bd.download(savepath=savepath, query=query, billing_project_id=billing_project_id)
+
+    log(f"{table_id} data saved on {savepath}")
+    return savepath
+
+
+@task()
+def execute_prediction_on_gypscie(
+    api,
+    model_params: dict,
+    # hours_to_predict,
+) -> str:
+    """
+    Requisição de execução de um processo de Predição
+    Return task_id
+    """
+    log("Starting prediction")
+    task_response = api.post(
+        path="workflow_run",
+        json=model_params,
+    )
+    # data={
+    #             "model_id": model_id,
+    #             "dataset_id": dataset_id,
+    #             "project_id": project_id,
+    #         },
+    response = wait_run(api, task_response.json())
+
+    if response["state"] != "SUCCESS":
+        failed_message = "Error processing this dataset. Stop flow or restart this task"
+        log(failed_message)
+        task_state = Failed(failed_message)
+        raise ENDRUN(state=task_state)
+
+    print(f"Prediction ended. Response: {response}, {response.json()}")
+    # TODO: retorna a predição? o id da do dataset?
+
+    return response.json().get("task_id")  # response.json().get('task_id')
+
+
+@task
+def task_wait_run(api, task_response, flow_type: str = "dataflow") -> Dict:
+    """
+    Force flow wait for the end of data processing
+    flow_type: dataflow or processor
+    """
+    return wait_run(api, task_response, flow_type)
+
+
+@task
+def get_dataflow_params(  # pylint: disable=too-many-arguments
+    workflow_id,
+    environment_id,
+    project_id,
+    load_data_funtion_id,
+    pre_processing_function_id,
+    model_function_id,
+    radar_data_id,
+    rain_gauge_data_id,
+    grid_data_id,
+    model_data_id,
+) -> List:
+    """
+    Return parameters for the model
+
+    {
+        "workflow_id": 36,
+        "environment_id": 1,
+        "parameters": [
+            {
+                "function_id":42,
+                "params": {"radar_data_path":178, "rain_gauge_data_path":179, "grid_data_path":177}
+            },
+            {
+                "function_id":43
+            },
+            {
+                "function_id":45,
+                "params": {"model_path":191}  # model was registered on Gypscie as a dataset
+            }
+        ],
+        "project_id": 1
+    }
+    """
+    return {
+        "workflow_id": workflow_id,
+        "environment_id": environment_id,
+        "parameters": [
+            {
+                "function_id": load_data_funtion_id,
+                "params": {
+                    "radar_data_path": radar_data_id,
+                    "rain_gauge_data_path": rain_gauge_data_id,
+                    "grid_data_path": grid_data_id,
+                },
+            },
+            {
+                "function_id": pre_processing_function_id,
+            },
+            {"function_id": model_function_id, "params": {"model_path": model_data_id}},
+        ],
+        "project_id": project_id,
+    }
+
+
+@task()
+def get_output_dataset_ids_on_gypscie(
+    api,
+    task_id,
+) -> List:
+    """
+    Get output files id with predictions
+    """
+    try:
+        response = api.get(path="status_workflow_run/" + task_id)
+        response = response.json()
+    except HTTPError as err:
+        if err.response.status_code == 404:
+            print(f"Task {task_id} not found")
+            return []
+
+    return response.get("output_datasets")
+
+
+@task()
+def download_datasets_from_gypscie(
+    api,
+    dataset_names: List,
+    wait=None,
+) -> List:
+    """
+    Get output files with predictions
+    """
+    for file_name in dataset_names:
+        response = api.get(path=f"download/datasets/{file_name}.zip")
+        if response.status_code == 200:
+            log(f"Dataset {file_name} downloaded")
+        else:
+            log(f"Dataset {file_name} not found on Gypscie")
+    # TODO: verificar se o arquivo é .zip mesmo
+    return [dataset_name + ".zip" for dataset_name in dataset_names]
+
+
+@task
+def desnormalize_data(array: np.ndarray):
+    """
+    Desnormalize data
+
+    Inputs:
+        array: numpy array
+    Returns:
+        a numpy array with the values desnormalized
+    """
+    return array
+
+
+@task
+def geolocalize_data(prediction_datasets: np.ndarray, now_datetime: str) -> pd.DataFrame:
+    """
+    Geolocalize data using grid and add timestamp
+
+    Inputs:
+        prediction_datasets: numpy array
+        now_datetime: string in format YYYY_MM_DD__H_M_S
+    Returns:
+        a pandas dataframe to be saved on GCP
+    Expected columns: latitude, longitude, janela_predicao,
+    valor_predicao, data_predicao (timestamp em que foi realizada a previsão)
+    """
+    return prediction_datasets
+
+
+@task
+def create_image(data) -> List:
+    """
+    Create image using Geolocalized data or the numpy array from desnormalized_data function
+    Exemplo de código que usei pra gerar uma imagem vindo de um xarray:
+
+    def create_and_save_image(data: xr.xarray, variable) -> Path:
+        plt.figure(figsize=(10, 10))
+
+        # Use the Geostationary projection in cartopy
+        axis = plt.axes(projection=ccrs.PlateCarree())
+
+        lat_max, lon_max = (
+            -21.708288842894145,
+            -42.36573106186053,
+        )  # canto superior direito
+        lat_min, lon_min = (
+            -23.793855217170343,
+            -45.04488171189226,
+        )  # canto inferior esquerdo
+
+        extent = [lon_min, lat_min, lon_max, lat_max]
+        img_extent = [extent[0], extent[2], extent[1], extent[3]]
+
+        # Define the color scale based on the channel
+        colormap = "jet"  # White to black for IR channels
+
+        # Plot the image
+        img = axis.imshow(data, origin="upper", extent=img_extent, cmap=colormap, alpha=0.8)
+
+        # Add coastlines, borders and gridlines
+        axis.coastlines(resolution='10m', color='black', linewidth=0.8)
+        axis.add_feature(cartopy.feature.BORDERS, edgecolor='white', linewidth=0.5)
+
+
+        grdln = axis.gridlines(
+            crs=ccrs.PlateCarree(),
+            color="gray",
+            alpha=0.7,
+            linestyle="--",
+            linewidth=0.7,
+            xlocs=np.arange(-180, 180, 1),
+            ylocs=np.arange(-90, 90, 1),
+            draw_labels=True,
+        )
+        grdln.top_labels = False
+        grdln.right_labels = False
+
+        plt.colorbar(
+            img,
+            label=variable.upper(),
+            extend="both",
+            orientation="horizontal",
+            pad=0.05,
+            fraction=0.05,
+        )
+
+        output_image_path = Path(os.getcwd()) / "output" / "images"
+
+        save_image_path = output_image_path / (f"{variable}.png")
+
+        if not output_image_path.exists():
+            output_image_path.mkdir(parents=True, exist_ok=True)
+
+        plt.savefig(save_image_path, bbox_inches="tight", pad_inches=0, dpi=300)
+        plt.show()
+        return save_image_path
+    """
+    save_image_path = "image.png"
+
+    return save_image_path
+
+
+@task
+def get_dataset_info(station_type: str, source: str) -> Dict:
+    """
+    Inputs:
+        station_type: str ["rain_gauge", "weather_station", "radar"]
+        source: str ["alertario", "inmet", "mendanha"]
+    """
+
+    if station_type == "rain_gauge":
+        dataset_info = {
+            "dataset_id": "clima_pluviometro",
+            "filename": "gauge_station_bq",
+            "partition_date_column": "datetime",
+        }
+        if source == "alertario":
+            dataset_info["table_id"] = "taxa_precipitacao_alertario"
+            dataset_info["destination_table_id"] = "preprocessamento_pluviometro_alertario"
+    elif station_type == "weather_station":
+        dataset_info = {
+            "dataset_id": "clima_pluviometro",
+            "filename": "weather_station_bq",
+            "partition_date_column": "datetime",
+        }
+        if source == "alertario":
+            dataset_info["table_id"] = "meteorologia_alertario"
+            dataset_info[
+                "destination_table_id"
+            ] = "preprocessamento_estacao_meteorologica_alertario"
+        elif source == "inmet":
+            dataset_info["table_id"] = "meteorologia_inmet"
+            dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"
+    else:
+        dataset_info = {
+            "dataset_id": "clima_radar",
+            "partition_date_column": "datetime",
+        }
+        if source == "mendanha":
+            dataset_info["storage_path"] = ""
+            dataset_info["destination_table_id"] = "preprocessamento_radar_mendanha"
+        elif source == "guaratiba":
+            dataset_info["storage_path"] = ""
+            dataset_info["destination_table_id"] = "preprocessamento_radar_guaratiba"
+        elif source == "macae":
+            dataset_info["storage_path"] = ""
+            dataset_info["destination_table_id"] = "preprocessamento_radar_macae"
+
+    return dataset_info
+
+
+def path_to_dfr(path: str) -> pd.DataFrame:
+
+    """
+    Reads a csv or parquet file from the given path and returns a dataframe
+    """
+    if path.endswith(".csv"):
+        dfr = pd.read_csv(path)
+    elif path.endswith(".parquet"):
+        dfr = pd.read_parquet(path)
+    else:
+        raise ValueError("File extension not supported")
+    return dfr
+
+
+def add_columns_on_dfr(
+    dfr: pd.DataFrame, model_version: int, update_time: bool = False
+) -> pd.DataFrame:
+    """
+    Reads a csv or parquet file from the given path and adds a column
+    with the update time based on Brazil timezone
+    """
+    if update_time:
+        dfr["update_time"] = pd.Timestamp.now(tz="America/Sao_Paulo")
+    if model_version is not None:
+        dfr["model_version"] = model_version
+    return dfr
diff --git a/pipelines/meteorologia/precipitacao_alertario/util_gypscie.py b/pipelines/meteorologia/precipitacao_alertario/util_gypscie.py
new file mode 100644
index 00000000..f5873dc6
--- /dev/null
+++ b/pipelines/meteorologia/precipitacao_alertario/util_gypscie.py
@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Utils file
+"""
+
+# from concurrent.futures import ThreadPoolExecutor, as_completed, wait
+from datetime import datetime, timedelta
+from time import sleep
+from typing import Callable, Dict, Tuple  # , List
+
+import basedosdados as bd
+import requests
+from prefeitura_rio.pipelines_utils.logging import log
+
+
+class GypscieApi:
+    """
+    GypscieApi
+    """
+
+    def __init__(
+        self,
+        username: str = None,
+        password: str = None,
+        base_url: str = None,
+        token_callback: Callable[[str, datetime], None] = lambda *_: None,
+    ) -> None:
+        if username is None or password is None:
+            raise ValueError("Must be set refresh token or username with password")
+
+        self._base_url = base_url or "https://gypscie.dados.rio/api/"
+        self._username = username
+        self._password = password
+        self._token_callback = token_callback
+        self._headers, self._token, self._expires_at = self._get_headers()
+
+    def _get_headers(self) -> Tuple[Dict[str, str], str, datetime]:
+
+        response = requests.post(
+            f"{self._base_url}login",
+            headers={"accept": "application/json", "Content-Type": "application/json"},
+            json={
+                # 'grant_type': 'password',
+                # 'scope': 'openid profile',
+                "username": self._username,
+                "password": self._password,
+            },
+        )
+        if response.status_code == 200:
+            token = response.json()["token"]
+            # now + expires_in_seconds - 10 minutes
+            expires_at = datetime.now() + timedelta(seconds=30 * 60)
+        else:
+            log(f"Status code: {response.status_code}\nResponse:{response.content}")
+            raise Exception()
+
+        return {"Authorization": f"Bearer {token}"}, token, expires_at
+
+    def _refresh_token_if_needed(self) -> None:
+        if self._expires_at <= datetime.now():
+            self._headers, self._token, self._expires_at = self._get_headers()
+            self._token_callback(self.get_token(), self.expires_at())
+
+    def refresh_token(self):
+        """
+        refresh
+        """
+        self._refresh_token_if_needed()
+
+    def get_token(self):
+        """
+        get token
+        """
+        self._refresh_token_if_needed()
+
+        return self._headers["Authorization"].split(" ")[1]
+
+    def expires_at(self):
+        """
+        expire
+        """
+        return self._expires_at
+
+    def get(self, path: str, timeout: int = 120) -> Dict:
+        """
+        get
+        """
+        self._refresh_token_if_needed()
+        response = requests.get(f"{self._base_url}{path}", headers=self._headers, timeout=timeout)
+        response.raise_for_status()
+        return response.json()
+
+    def put(self, path, json=None):
+        """
+        put
+        """
+        self._refresh_token_if_needed()
+        response = requests.put(f"{self._base_url}{path}", headers=self._headers, json=json)
+        return response
+
+    def post(self, path, data: dict = None, json: dict = None, files: dict = None):
+        """
+        post
+        """
+        self._refresh_token_if_needed()
+        response = requests.post(
+            url=f"{self._base_url}{path}",
+            headers=self._headers,
+            data=data,
+            json=json,
+            files=files,
+        )
+        # response = requests.post(f"{self._base_url}{path}", headers=self._headers, json=json)
+        return response
+
+
+def bq_project(kind: str = "bigquery_prod"):
+    """Get the set BigQuery project_id
+
+    Args:
+        kind (str, optional): Which client to get the project name from.
+        Options are 'bigquery_staging', 'bigquery_prod' and 'storage_staging'
+        Defaults to 'bigquery_prod'.
+
+    Returns:
+        str: the requested project_id
+    """
+    return bd.upload.base.Base().client[kind].project
+
+
+def wait_run(api, task_response, flow_type: str = "dataflow") -> Dict:
+    """
+    Force flow wait for the end of data processing
+    flow_type: dataflow or processor
+    Return:
+    {
+        "result": {},
+        "state": "string",
+        "status": "string"
+    }
+    """
+    if "task_id" in task_response.keys():
+        _id = task_response.get("task_id")
+    else:
+        log(f"Error processing: task_id not found on response:{task_response}")
+        # TODO: stop flow here
+
+    # Request to get the execution status
+    path_flow_type = "status_workflow_run" if flow_type == "dataflow" else "status_processor_run"
+    response = api.get(
+        path=f"{path_flow_type}/" + _id,
+    )
+
+    log(f"Execution status: {response}.")
+    while response["state"] == "STARTED":
+        sleep(5)
+        response = wait_run(api, task_response)
+
+    if response["state"] != "SUCCESS":
+        log("Error processing this dataset. Stop flow or restart this task")
+
+    return response

From c93fbf693b81a538778d1e445c8c71515b236450 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Wed, 9 Oct 2024 16:13:08 -0300
Subject: [PATCH 02/56] adding parameters on scheduler

---
 .../meteorologia/precipitacao_alertario/flows.py   | 14 +++++++-------
 .../precipitacao_alertario/schedules.py            | 12 ++++++++++++
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 6c727787..8f37d560 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -32,8 +32,10 @@
 from pipelines.rj_escritorio.rain_dashboard.constants import (
     constants as rain_dashboard_constants,
 )
+
 # from pipelines.utils.constants import constants as utils_constants
 from pipelines.utils.custom import wait_for_flow_run_with_timeout
+
 # from pipelines.utils.dump_db.constants import constants as dump_db_constants
 from pipelines.utils.dump_to_gcs.constants import constants as dump_to_gcs_constants
 
@@ -100,14 +102,12 @@
     processor_name = Parameter("processor_name", default="etl_alertario22", required=True)
     dataset_processor_id = Parameter("dataset_processor_id", default=43, required=False)  # mudar
 
-    # Parameters for saving data on GCP
-    materialize_after_dump = Parameter("materialize_after_dump", default=False, required=False)
-    dump_mode = Parameter("dump_mode", default=False, required=False)
+    # Parameters for saving data preprocessed on GCP
     dataset_id_previsao_chuva = Parameter(
-        "dataset_id", default="clima_previsao_chuva", required=False
+        "dataset_id_previsao_chuva", default="clima_previsao_chuva", required=False
     )
     table_id_previsao_chuva = Parameter(
-        "table_id", default="preprocessamento_pluviometro_alertario", required=False
+        "table_id_previsao_chuva", default="preprocessamento_pluviometro_alertario", required=False
     )
 
     # Dataset parameters
@@ -469,12 +469,12 @@
         data_path=prediction_data_path,
         dataset_id=dataset_id_previsao_chuva,
         table_id=table_id_previsao_chuva,
-        dump_mode=dump_mode,
+        dump_mode=DUMP_MODE,
         biglake_table=False,
     )
 
     # Trigger DBT flow run
-    with case(materialize_after_dump, True):
+    with case(MATERIALIZE_AFTER_DUMP, True):
         run_dbt = task_run_dbt_model_task(
             dataset_id=dataset_id_previsao_chuva,
             table_id=table_id_previsao_chuva,
diff --git a/pipelines/meteorologia/precipitacao_alertario/schedules.py b/pipelines/meteorologia/precipitacao_alertario/schedules.py
index ad90fbd7..2f1a74da 100644
--- a/pipelines/meteorologia/precipitacao_alertario/schedules.py
+++ b/pipelines/meteorologia/precipitacao_alertario/schedules.py
@@ -27,6 +27,18 @@
                 "materialize_to_datario": False,
                 "mode": "prod",
                 "dump_to_gcs": False,
+                "environment_id": 1,
+                "domain_id": 1,
+                "project_id": 1,
+                "project_name": "rionowcast_precipitation",
+                "processor_name": "etl_alertario22",
+                "dataset_processor_id": 43,
+                "dataset_id_previsao_chuva": "clima_previsao_chuva",
+                "table_id_previsao_chuva": "preprocessamento_pluviometro_alertario",
+                "station_type": "pluviometro",
+                "source": "alertario",
+                "maximum_bytes_processed": None,
+                "model_version": 1,
             },
         ),
     ]

From d41481661c7d487e34dfd8590fe1ef188a648cce Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 9 Oct 2024 19:13:35 +0000
Subject: [PATCH 03/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../precipitacao_alertario/flows.py           | 20 +++++++++----------
 .../precipitacao_alertario/tasks_gypscie.py   |  1 -
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 8f37d560..e12eb573 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -29,15 +29,6 @@
     save_last_dbt_update,
     treat_pluviometer_and_meteorological_data,
 )
-from pipelines.rj_escritorio.rain_dashboard.constants import (
-    constants as rain_dashboard_constants,
-)
-
-# from pipelines.utils.constants import constants as utils_constants
-from pipelines.utils.custom import wait_for_flow_run_with_timeout
-
-# from pipelines.utils.dump_db.constants import constants as dump_db_constants
-from pipelines.utils.dump_to_gcs.constants import constants as dump_to_gcs_constants
 
 # preprocessing imports
 from pipelines.precipitation_model.rionowcast.tasks import (  # pylint: disable=E0611, E0401
@@ -51,9 +42,16 @@
     register_dataset_on_gypscie,
     task_wait_run,
 )
-from pipelines.tasks import (  # pylint: disable=E0611, E0401
-    task_create_partitions,
+from pipelines.rj_escritorio.rain_dashboard.constants import (
+    constants as rain_dashboard_constants,
 )
+from pipelines.tasks import task_create_partitions  # pylint: disable=E0611, E0401
+
+# from pipelines.utils.constants import constants as utils_constants
+from pipelines.utils.custom import wait_for_flow_run_with_timeout
+
+# from pipelines.utils.dump_db.constants import constants as dump_db_constants
+from pipelines.utils.dump_to_gcs.constants import constants as dump_to_gcs_constants
 
 wait_for_flow_run_with_5min_timeout = wait_for_flow_run_with_timeout(timeout=timedelta(minutes=5))
 
diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py b/pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py
index 840540f0..63cfafcc 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py
@@ -10,7 +10,6 @@
 
 import numpy as np
 import pandas as pd
-
 from basedosdados.upload.base import Base
 from google.cloud import bigquery
 from prefect import task

From 1d7b4cb6267634be3229b1f84003b3ba0c9f33f1 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Wed, 9 Oct 2024 16:41:36 -0300
Subject: [PATCH 04/56] adding preprocessing on mendanha flow

---
 .../precipitacao_alertario/flows.py           |  23 +--
 .../meteorologia/radar/mendanha/flows.py      | 143 ++++++++++++++++--
 .../gypscie/tasks.py}                         |   2 +-
 .../gypscie/utils.py}                         |   0
 4 files changed, 142 insertions(+), 26 deletions(-)
 rename pipelines/{meteorologia/precipitacao_alertario/tasks_gypscie.py => utils/gypscie/tasks.py} (99%)
 rename pipelines/{meteorologia/precipitacao_alertario/util_gypscie.py => utils/gypscie/utils.py} (100%)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index e12eb573..9b40cc21 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -30,8 +30,19 @@
     treat_pluviometer_and_meteorological_data,
 )
 
+from pipelines.rj_escritorio.rain_dashboard.constants import (
+    constants as rain_dashboard_constants,
+)
+from pipelines.tasks import task_create_partitions  # pylint: disable=E0611, E0401
+
+# from pipelines.utils.constants import constants as utils_constants
+from pipelines.utils.custom import wait_for_flow_run_with_timeout
+
+# from pipelines.utils.dump_db.constants import constants as dump_db_constants
+from pipelines.utils.dump_to_gcs.constants import constants as dump_to_gcs_constants
+
 # preprocessing imports
-from pipelines.precipitation_model.rionowcast.tasks import (  # pylint: disable=E0611, E0401
+from pipelines.utils.gypscie.tasks import (  # pylint: disable=E0611, E0401
     access_api,
     add_columns_on_dfr,
     download_datasets_from_gypscie,
@@ -42,16 +53,6 @@
     register_dataset_on_gypscie,
     task_wait_run,
 )
-from pipelines.rj_escritorio.rain_dashboard.constants import (
-    constants as rain_dashboard_constants,
-)
-from pipelines.tasks import task_create_partitions  # pylint: disable=E0611, E0401
-
-# from pipelines.utils.constants import constants as utils_constants
-from pipelines.utils.custom import wait_for_flow_run_with_timeout
-
-# from pipelines.utils.dump_db.constants import constants as dump_db_constants
-from pipelines.utils.dump_to_gcs.constants import constants as dump_to_gcs_constants
 
 wait_for_flow_run_with_5min_timeout = wait_for_flow_run_with_timeout(timeout=timedelta(minutes=5))
 
diff --git a/pipelines/meteorologia/radar/mendanha/flows.py b/pipelines/meteorologia/radar/mendanha/flows.py
index 289aa967..bb2deab7 100644
--- a/pipelines/meteorologia/radar/mendanha/flows.py
+++ b/pipelines/meteorologia/radar/mendanha/flows.py
@@ -4,15 +4,20 @@
 """
 Flows for setting rain dashboard using radar data.
 """
-from prefect import Parameter, case
-from prefect.run_configs import KubernetesRun
-from prefect.storage import GCS
-from prefeitura_rio.pipelines_utils.custom import Flow
+from prefect import Parameter, case  # pylint: disable=E0611, E0401
+from prefect.run_configs import KubernetesRun  # pylint: disable=E0611, E0401
+from prefect.storage import GCS  # pylint: disable=E0611, E0401
+from prefeitura_rio.pipelines_utils.custom import Flow  # pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.state_handlers import handler_inject_bd_credentials
+from prefeitura_rio.pipelines_utils.tasks import (  # pylint: disable=E0611, E0401
+    create_table_and_upload_to_gcs,
+    get_now_datetime,
+    task_run_dbt_model_task,
+)
 
-from pipelines.constants import constants
-from pipelines.meteorologia.radar.mendanha.constants import (
-    constants as radar_constants,  # pylint: disable=E0611, E0401
+from pipelines.constants import constants  # pylint: disable=E0611, E0401
+from pipelines.meteorologia.radar.mendanha.constants import (  # pylint: disable=E0611, E0401
+    constants as radar_constants,
 )
 
 # from pipelines.tasks import task_get_redis_client
@@ -51,6 +56,23 @@
     task_save_on_redis,
 )
 
+# preprocessing imports
+from pipelines.utils.gypscie.tasks import (  # pylint: disable=E0611, E0401
+    access_api as access_api_gypscie,
+    add_columns_on_dfr,
+    download_datasets_from_gypscie,
+    execute_dataset_processor,
+    get_dataset_info,
+    get_dataset_processor_info,
+    path_to_dfr,
+    register_dataset_on_gypscie,
+    task_wait_run,
+)
+
+from pipelines.tasks import (  # pylint: disable=E0611, E0401
+    task_create_partitions,
+)
+
 # create_visualization_with_background, prefix_to_restore, save_data,
 # from pipelines.utils_rj_cor import build_redis_hash  # pylint: disable=E0611, E0401
 
@@ -81,13 +103,38 @@
     # BASE_PATH = "pipelines/rj_cor/meteorologia/radar/precipitacao/"
     BUCKET_NAME = "rj-escritorio-scp"
 
-    # redis_data_key = Parameter("redis_data_key", default="data_last_15min_rain")
-    # redis_update_key = Parameter(
-    #     "redis_update_key", default="data_last_15min_rain_update"
-    # )
-    # redis_host = Parameter("redis_host", default="redis.redis.svc.cluster.local")
-    # redis_port = Parameter("redis_port", default=6379)
-    # redis_db = Parameter("redis_db", default=1)
+    # Preprocessing gypscie parameters
+    # Gypscie parameters
+    environment_id = Parameter("environment_id", default=1, required=False)
+    domain_id = Parameter("domain_id", default=1, required=False)
+    project_id = Parameter("project_id", default=1, required=False)
+    project_name = Parameter("project_name", default="rionowcast_precipitation", required=False)
+
+    # Gypscie processor parameters
+    processor_name = Parameter("processor_name", default="etl_alertario22", required=True)
+    dataset_processor_id = Parameter("dataset_processor_id", default=43, required=False)  # mudar
+
+    # Parameters for saving data on GCP
+    materialize_after_dump = Parameter("materialize_after_dump", default=False, required=False)
+    dump_mode = Parameter("dump_mode", default=False, required=False)
+    dataset_id_previsao_chuva = Parameter(
+        "dataset_id_previsao_chuva", default="clima_previsao_chuva", required=False
+    )
+    table_id_previsao_chuva = Parameter(
+        "table_id_previsao_chuva", default="preprocessamento_radar_mendanha", required=False
+    )
+
+    # Dataset parameters
+    station_type = Parameter("station_type", default="radar", required=False)
+    source = Parameter("source", default="mendanha", required=False)
+
+    # Dataset path, if it was saved on ETL flow or it will be None
+    dataset_path = Parameter("dataset_path", default=None, required=False)  # dataset_path
+    model_version = Parameter("model_version", default=1, required=False)
+
+    ############################
+    #  Start radar flow        #
+    ############################
 
     redis_client = task_get_redis_client(infisical_secrets_path="/redis")
     redis_hash = task_build_redis_hash(DATASET_ID, TABLE_ID, name="images", mode=MODE)
@@ -220,6 +267,74 @@
     )
     # save_last_update_redis.set_upstream(upload_table)
 
+    ####################################
+    #  Start preprocessing flow        #
+    ####################################
+
+    api_gypscie = access_api_gypscie()
+
+    dataset_info = get_dataset_info(station_type, source)
+
+    # Get processor information on gypscie
+    with case(dataset_processor_id, None):
+        dataset_processor_response, dataset_processor_id = get_dataset_processor_info(
+            api_gypscie, processor_name
+        )
+
+    dataset_response = register_dataset_on_gypscie(api_gypscie, filepath=dataset_path, domain_id=domain_id)
+    # TODO: verifcar no codigo do augustp se são esses os parametros corretos
+    processor_parameters = {
+        "dataset1": str(dataset_path).rsplit("/", maxsplit=1)[-1],
+        "station_type": station_type,
+    }
+
+    dataset_processor_task_id = execute_dataset_processor(
+        api_gypscie,
+        processor_id=dataset_processor_id,
+        dataset_id=[dataset_response["id"]],
+        environment_id=environment_id,
+        project_id=project_id,
+        parameters=processor_parameters,
+    )
+    wait_run = task_wait_run(api_gypscie, dataset_processor_task_id, flow_type="processor")
+    dataset_path = download_datasets_from_gypscie(
+        api_gypscie, dataset_names=[dataset_response["id"]], wait=wait_run
+    )
+    dfr_ = path_to_dfr(dataset_path)
+    # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
+    dfr = add_columns_on_dfr(dfr_, model_version, update_time=True)
+
+    # Save pre-treated data on local file with partitions
+    now_datetime = get_now_datetime()
+    prediction_data_path = task_create_partitions(
+        dfr,
+        partition_date_column=dataset_info["partition_date_column"],
+        savepath="model_prediction",
+        suffix=now_datetime,
+    )
+    ################################
+    #  Save preprocessing on GCP   #
+    ################################
+
+    # Upload data to BigQuery
+    create_table = create_table_and_upload_to_gcs(
+        data_path=prediction_data_path,
+        dataset_id=dataset_id_previsao_chuva,
+        table_id=table_id_previsao_chuva,
+        dump_mode=dump_mode,
+        biglake_table=False,
+    )
+
+    # Trigger DBT flow run
+    with case(materialize_after_dump, True):
+        run_dbt = task_run_dbt_model_task(
+            dataset_id=dataset_id_previsao_chuva,
+            table_id=table_id_previsao_chuva,
+            # mode=materialization_mode,
+            # materialize_to_datario=materialize_to_datario,
+        )
+        run_dbt.set_upstream(create_table)
+
 
 cor_meteorologia_refletividade_radar_flow.storage = GCS(constants.GCS_FLOWS_BUCKET.value)
 cor_meteorologia_refletividade_radar_flow.run_config = KubernetesRun(
diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py b/pipelines/utils/gypscie/tasks.py
similarity index 99%
rename from pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py
rename to pipelines/utils/gypscie/tasks.py
index 63cfafcc..b633c80d 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks_gypscie.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -20,7 +20,7 @@
 from requests.exceptions import HTTPError
 
 from pipelines.constants import constants  # pylint: disable=E0611, E0401
-from pipelines.precipitation_model.rionowcast.utils import (  # pylint: disable=E0611, E0401
+from pipelines.utils.gypscie.utils import (  # pylint: disable=E0611, E0401
     GypscieApi,
     wait_run,
 )
diff --git a/pipelines/meteorologia/precipitacao_alertario/util_gypscie.py b/pipelines/utils/gypscie/utils.py
similarity index 100%
rename from pipelines/meteorologia/precipitacao_alertario/util_gypscie.py
rename to pipelines/utils/gypscie/utils.py

From 6e63aa7b1d25506b2c966c51da427af44c7cbccb Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Wed, 9 Oct 2024 16:57:56 -0300
Subject: [PATCH 05/56] changin path where dfr was saved

---
 .../precipitacao_alertario/flows.py           | 134 +++++++++---------
 .../precipitacao_alertario/tasks.py           |  18 +--
 .../meteorologia/radar/mendanha/flows.py      | 116 +++++++--------
 3 files changed, 139 insertions(+), 129 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 9b40cc21..181c830a 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -5,10 +5,10 @@
 """
 from datetime import timedelta
 
-from prefect import Parameter, case
-from prefect.run_configs import KubernetesRun
-from prefect.storage import GCS
-from prefect.tasks.prefect import create_flow_run, wait_for_flow_run
+from prefect import Parameter, case  # pylint: disable=E0611, E0401
+from prefect.run_configs import KubernetesRun  # pylint: disable=E0611, E0401
+from prefect.storage import GCS  # pylint: disable=E0611, E0401
+from prefect.tasks.prefect import create_flow_run, wait_for_flow_run  # pylint: disable=E0611,E0401
 from prefeitura_rio.pipelines_utils.custom import Flow  # pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.state_handlers import handler_inject_bd_credentials
 from prefeitura_rio.pipelines_utils.tasks import (  # pylint: disable=E0611, E0401
@@ -91,6 +91,8 @@
         default=dump_to_gcs_constants.MAX_BYTES_PROCESSED_PER_TABLE.value,
     )
 
+    # Preprocessing gypscie parameters
+    preprocessing_gypscie = Parameter("preprocessing_gypscie", default=False, required=False)
     # Gypscie parameters
     environment_id = Parameter("environment_id", default=1, required=False)
     domain_id = Parameter("domain_id", default=1, required=False)
@@ -136,7 +138,7 @@
     )
 
     with case(empty_data_pluviometric, False):
-        path_pluviometric = save_data(
+        path_pluviometric, full_path_pluviometric = save_data(
             dfr_pluviometric, "pluviometric", wait=empty_data_pluviometric
         )
         # Create table in BigQuery
@@ -418,69 +420,73 @@
     #  Start preprocessing flow        #
     ####################################
 
-    api = access_api()
+    with case(empty_data_pluviometric, False):
+        with case(preprocessing_gypscie, True):
+            api = access_api()
 
-    dataset_info = get_dataset_info(station_type, source)
+            dataset_info = get_dataset_info(station_type, source)
 
-    # Get processor information on gypscie
-    with case(dataset_processor_id, None):
-        dataset_processor_response, dataset_processor_id = get_dataset_processor_info(
-            api, processor_name
-        )
+            # Get processor information on gypscie
+            with case(dataset_processor_id, None):
+                dataset_processor_response, dataset_processor_id = get_dataset_processor_info(
+                    api, processor_name
+                )
 
-    dataset_response = register_dataset_on_gypscie(api, filepath=dataset_path, domain_id=domain_id)
-    # TODO: verifcar no codigo do augustp se são esses os parametros corretos
-    processor_parameters = {
-        "dataset1": str(dataset_path).rsplit("/", maxsplit=1)[-1],
-        "station_type": station_type,
-    }
-
-    dataset_processor_task_id = execute_dataset_processor(
-        api,
-        processor_id=dataset_processor_id,
-        dataset_id=[dataset_response["id"]],
-        environment_id=environment_id,
-        project_id=project_id,
-        parameters=processor_parameters,
-    )
-    wait_run = task_wait_run(api, dataset_processor_task_id, flow_type="processor")
-    dataset_path = download_datasets_from_gypscie(
-        api, dataset_names=[dataset_response["id"]], wait=wait_run
-    )
-    dfr_ = path_to_dfr(dataset_path)
-    # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
-    dfr = add_columns_on_dfr(dfr_, model_version, update_time=True)
-
-    # Save pre-treated data on local file with partitions
-    now_datetime = get_now_datetime()
-    prediction_data_path = task_create_partitions(
-        dfr,
-        partition_date_column=dataset_info["partition_date_column"],
-        savepath="model_prediction",
-        suffix=now_datetime,
-    )
-    ################################
-    #  Save preprocessing on GCP   #
-    ################################
-
-    # Upload data to BigQuery
-    create_table = create_table_and_upload_to_gcs(
-        data_path=prediction_data_path,
-        dataset_id=dataset_id_previsao_chuva,
-        table_id=table_id_previsao_chuva,
-        dump_mode=DUMP_MODE,
-        biglake_table=False,
-    )
+            dataset_response = register_dataset_on_gypscie(
+                api, filepath=full_path_pluviometric, domain_id=domain_id
+            )
+            # TODO: verifcar no codigo do augustp se são esses os parametros corretos
+            processor_parameters = {
+                "dataset1": str(dataset_path).rsplit("/", maxsplit=1)[-1],
+                "station_type": station_type,
+            }
+
+            dataset_processor_task_id = execute_dataset_processor(
+                api,
+                processor_id=dataset_processor_id,
+                dataset_id=[dataset_response["id"]],
+                environment_id=environment_id,
+                project_id=project_id,
+                parameters=processor_parameters,
+            )
+            wait_run = task_wait_run(api, dataset_processor_task_id, flow_type="processor")
+            dataset_path = download_datasets_from_gypscie(
+                api, dataset_names=[dataset_response["id"]], wait=wait_run
+            )
+            dfr_ = path_to_dfr(dataset_path)
+            # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
+            dfr = add_columns_on_dfr(dfr_, model_version, update_time=True)
+
+            # Save pre-treated data on local file with partitions
+            now_datetime = get_now_datetime()
+            prediction_data_path = task_create_partitions(
+                dfr,
+                partition_date_column=dataset_info["partition_date_column"],
+                savepath="model_prediction",
+                suffix=now_datetime,
+            )
+            ################################
+            #  Save preprocessing on GCP   #
+            ################################
+
+            # Upload data to BigQuery
+            create_table = create_table_and_upload_to_gcs(
+                data_path=prediction_data_path,
+                dataset_id=dataset_id_previsao_chuva,
+                table_id=table_id_previsao_chuva,
+                dump_mode=DUMP_MODE,
+                biglake_table=False,
+            )
 
-    # Trigger DBT flow run
-    with case(MATERIALIZE_AFTER_DUMP, True):
-        run_dbt = task_run_dbt_model_task(
-            dataset_id=dataset_id_previsao_chuva,
-            table_id=table_id_previsao_chuva,
-            # mode=materialization_mode,
-            # materialize_to_datario=materialize_to_datario,
-        )
-        run_dbt.set_upstream(create_table)
+            # Trigger DBT flow run
+            with case(MATERIALIZE_AFTER_DUMP, True):
+                run_dbt = task_run_dbt_model_task(
+                    dataset_id=dataset_id_previsao_chuva,
+                    table_id=table_id_previsao_chuva,
+                    # mode=materialization_mode,
+                    # materialize_to_datario=materialize_to_datario,
+                )
+                run_dbt.set_upstream(create_table)
 
 # para rodar na cloud
 cor_meteorologia_precipitacao_alertario.storage = GCS(constants.GCS_FLOWS_BUCKET.value)
diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index d64c0751..703f0ac6 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -9,11 +9,11 @@
 
 import numpy as np
 import pandas as pd
-import pendulum
+import pendulum  # pylint: disable=E0401
 import requests
-from bs4 import BeautifulSoup
-from prefect import task
-from prefeitura_rio.pipelines_utils.infisical import get_secret
+from bs4 import BeautifulSoup  # pylint: disable=E0401
+from prefect import task  # pylint: disable=E0401
+from prefeitura_rio.pipelines_utils.infisical import get_secret  # pylint: disable=E0401
 
 from pipelines.constants import constants
 from pipelines.meteorologia.precipitacao_alertario.utils import (
@@ -178,12 +178,12 @@ def treat_pluviometer_and_meteorological_data(
     return dfr, empty_data
 
 
-@task
+@task(nout=2)
 def save_data(
     dfr: pd.DataFrame,
     data_name: str = "temp",
     wait=None,  # pylint: disable=unused-argument
-) -> Union[str, Path]:
+) -> Tuple[Union[str, Path], Union[str, Path]]:
     """
     Salvar dfr tratados em csv para conseguir subir pro GCP
     """
@@ -199,15 +199,15 @@ def save_data(
     log(f"Dataframe for {data_name} after partitions {dataframe.iloc[0]}")
     log(f"Dataframe for {data_name} after partitions {dataframe.dtypes}")
 
-    to_partitions(
+    full_path = to_partitions(
         data=dataframe,
         partition_columns=partitions,
         savepath=prepath,
         data_type="csv",
         suffix=current_time,
     )
-    log(f"Files saved on {prepath}")
-    return prepath
+    log(f"Files saved on {prepath}, full path is {full_path}")
+    return prepath, full_path
 
 
 @task
diff --git a/pipelines/meteorologia/radar/mendanha/flows.py b/pipelines/meteorologia/radar/mendanha/flows.py
index bb2deab7..38524f61 100644
--- a/pipelines/meteorologia/radar/mendanha/flows.py
+++ b/pipelines/meteorologia/radar/mendanha/flows.py
@@ -104,6 +104,7 @@
     BUCKET_NAME = "rj-escritorio-scp"
 
     # Preprocessing gypscie parameters
+    preprocessing_gypscie = Parameter("preprocessing_gypscie", default=False, required=False)
     # Gypscie parameters
     environment_id = Parameter("environment_id", default=1, required=False)
     domain_id = Parameter("domain_id", default=1, required=False)
@@ -271,69 +272,72 @@
     #  Start preprocessing flow        #
     ####################################
 
-    api_gypscie = access_api_gypscie()
+    with case(preprocessing_gypscie, True):
+        api_gypscie = access_api_gypscie()
 
-    dataset_info = get_dataset_info(station_type, source)
+        dataset_info = get_dataset_info(station_type, source)
 
-    # Get processor information on gypscie
-    with case(dataset_processor_id, None):
-        dataset_processor_response, dataset_processor_id = get_dataset_processor_info(
-            api_gypscie, processor_name
+        # Get processor information on gypscie
+        with case(dataset_processor_id, None):
+            dataset_processor_response, dataset_processor_id = get_dataset_processor_info(
+                api_gypscie, processor_name
+            )
+        # TODO: e se o radar_files tiver mais de um arquivo?
+        dataset_response = register_dataset_on_gypscie(
+            api_gypscie, filepath=radar_files, domain_id=domain_id
         )
+        # TODO: verifcar no codigo do augustp se são esses os parametros corretos
+        processor_parameters = {
+            "dataset1": str(dataset_path).rsplit("/", maxsplit=1)[-1],
+            "station_type": station_type,
+        }
+
+        dataset_processor_task_id = execute_dataset_processor(
+            api_gypscie,
+            processor_id=dataset_processor_id,
+            dataset_id=[dataset_response["id"]],
+            environment_id=environment_id,
+            project_id=project_id,
+            parameters=processor_parameters,
+        )
+        wait_run = task_wait_run(api_gypscie, dataset_processor_task_id, flow_type="processor")
+        dataset_path = download_datasets_from_gypscie(
+            api_gypscie, dataset_names=[dataset_response["id"]], wait=wait_run
+        )
+        dfr_ = path_to_dfr(dataset_path)
+        # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
+        dfr = add_columns_on_dfr(dfr_, model_version, update_time=True)
+
+        # Save pre-treated data on local file with partitions
+        now_datetime = get_now_datetime()
+        prediction_data_path = task_create_partitions(
+            dfr,
+            partition_date_column=dataset_info["partition_date_column"],
+            savepath="model_prediction",
+            suffix=now_datetime,
+        )
+        ################################
+        #  Save preprocessing on GCP   #
+        ################################
 
-    dataset_response = register_dataset_on_gypscie(api_gypscie, filepath=dataset_path, domain_id=domain_id)
-    # TODO: verifcar no codigo do augustp se são esses os parametros corretos
-    processor_parameters = {
-        "dataset1": str(dataset_path).rsplit("/", maxsplit=1)[-1],
-        "station_type": station_type,
-    }
-
-    dataset_processor_task_id = execute_dataset_processor(
-        api_gypscie,
-        processor_id=dataset_processor_id,
-        dataset_id=[dataset_response["id"]],
-        environment_id=environment_id,
-        project_id=project_id,
-        parameters=processor_parameters,
-    )
-    wait_run = task_wait_run(api_gypscie, dataset_processor_task_id, flow_type="processor")
-    dataset_path = download_datasets_from_gypscie(
-        api_gypscie, dataset_names=[dataset_response["id"]], wait=wait_run
-    )
-    dfr_ = path_to_dfr(dataset_path)
-    # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
-    dfr = add_columns_on_dfr(dfr_, model_version, update_time=True)
-
-    # Save pre-treated data on local file with partitions
-    now_datetime = get_now_datetime()
-    prediction_data_path = task_create_partitions(
-        dfr,
-        partition_date_column=dataset_info["partition_date_column"],
-        savepath="model_prediction",
-        suffix=now_datetime,
-    )
-    ################################
-    #  Save preprocessing on GCP   #
-    ################################
-
-    # Upload data to BigQuery
-    create_table = create_table_and_upload_to_gcs(
-        data_path=prediction_data_path,
-        dataset_id=dataset_id_previsao_chuva,
-        table_id=table_id_previsao_chuva,
-        dump_mode=dump_mode,
-        biglake_table=False,
-    )
-
-    # Trigger DBT flow run
-    with case(materialize_after_dump, True):
-        run_dbt = task_run_dbt_model_task(
+        # Upload data to BigQuery
+        create_table = create_table_and_upload_to_gcs(
+            data_path=prediction_data_path,
             dataset_id=dataset_id_previsao_chuva,
             table_id=table_id_previsao_chuva,
-            # mode=materialization_mode,
-            # materialize_to_datario=materialize_to_datario,
+            dump_mode=dump_mode,
+            biglake_table=False,
         )
-        run_dbt.set_upstream(create_table)
+
+        # Trigger DBT flow run
+        with case(materialize_after_dump, True):
+            run_dbt = task_run_dbt_model_task(
+                dataset_id=dataset_id_previsao_chuva,
+                table_id=table_id_previsao_chuva,
+                # mode=materialization_mode,
+                # materialize_to_datario=materialize_to_datario,
+            )
+            run_dbt.set_upstream(create_table)
 
 
 cor_meteorologia_refletividade_radar_flow.storage = GCS(constants.GCS_FLOWS_BUCKET.value)

From 8f37f36e3343ae4f28aba6eb8011ebc557bdc43b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 9 Oct 2024 19:58:29 +0000
Subject: [PATCH 06/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../meteorologia/precipitacao_alertario/flows.py  |  6 ++++--
 pipelines/meteorologia/radar/mendanha/flows.py    | 15 +++++++--------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 181c830a..092d1ae6 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -8,7 +8,10 @@
 from prefect import Parameter, case  # pylint: disable=E0611, E0401
 from prefect.run_configs import KubernetesRun  # pylint: disable=E0611, E0401
 from prefect.storage import GCS  # pylint: disable=E0611, E0401
-from prefect.tasks.prefect import create_flow_run, wait_for_flow_run  # pylint: disable=E0611,E0401
+from prefect.tasks.prefect import (  # pylint: disable=E0611,E0401
+    create_flow_run,
+    wait_for_flow_run,
+)
 from prefeitura_rio.pipelines_utils.custom import Flow  # pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.state_handlers import handler_inject_bd_credentials
 from prefeitura_rio.pipelines_utils.tasks import (  # pylint: disable=E0611, E0401
@@ -29,7 +32,6 @@
     save_last_dbt_update,
     treat_pluviometer_and_meteorological_data,
 )
-
 from pipelines.rj_escritorio.rain_dashboard.constants import (
     constants as rain_dashboard_constants,
 )
diff --git a/pipelines/meteorologia/radar/mendanha/flows.py b/pipelines/meteorologia/radar/mendanha/flows.py
index 38524f61..8fb9d648 100644
--- a/pipelines/meteorologia/radar/mendanha/flows.py
+++ b/pipelines/meteorologia/radar/mendanha/flows.py
@@ -16,8 +16,8 @@
 )
 
 from pipelines.constants import constants  # pylint: disable=E0611, E0401
-from pipelines.meteorologia.radar.mendanha.constants import (  # pylint: disable=E0611, E0401
-    constants as radar_constants,
+from pipelines.meteorologia.radar.mendanha.constants import (
+    constants as radar_constants,  # pylint: disable=E0611, E0401
 )
 
 # from pipelines.tasks import task_get_redis_client
@@ -51,14 +51,17 @@
 # from prefect.tasks.prefect import create_flow_run, wait_for_flow_run
 from pipelines.tasks import (  # pylint: disable=E0611, E0401
     task_build_redis_hash,
+    task_create_partitions,
     task_get_redis_client,
     task_get_redis_output,
     task_save_on_redis,
 )
 
 # preprocessing imports
-from pipelines.utils.gypscie.tasks import (  # pylint: disable=E0611, E0401
-    access_api as access_api_gypscie,
+from pipelines.utils.gypscie.tasks import (
+    access_api as access_api_gypscie,  # pylint: disable=E0611, E0401
+)
+from pipelines.utils.gypscie.tasks import (
     add_columns_on_dfr,
     download_datasets_from_gypscie,
     execute_dataset_processor,
@@ -69,10 +72,6 @@
     task_wait_run,
 )
 
-from pipelines.tasks import (  # pylint: disable=E0611, E0401
-    task_create_partitions,
-)
-
 # create_visualization_with_background, prefix_to_restore, save_data,
 # from pipelines.utils_rj_cor import build_redis_hash  # pylint: disable=E0611, E0401
 

From af14baea8486de07622f152d18d2fde1db0b3d7c Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 17 Oct 2024 15:01:40 -0300
Subject: [PATCH 07/56] adding treatment version on gypscie register dataset

---
 pipelines/meteorologia/precipitacao_alertario/flows.py | 5 +++--
 pipelines/meteorologia/precipitacao_alertario/tasks.py | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 181c830a..d8037a09 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -98,6 +98,7 @@
     domain_id = Parameter("domain_id", default=1, required=False)
     project_id = Parameter("project_id", default=1, required=False)
     project_name = Parameter("project_name", default="rionowcast_precipitation", required=False)
+    treatment_version = Parameter("treatment_version", default=1, required=False)
 
     # Gypscie processor parameters
     processor_name = Parameter("processor_name", default="etl_alertario22", required=True)
@@ -139,7 +140,7 @@
 
     with case(empty_data_pluviometric, False):
         path_pluviometric, full_path_pluviometric = save_data(
-            dfr_pluviometric, "pluviometric", wait=empty_data_pluviometric
+            dfr_pluviometric, treatment_version, "pluviometric", wait=empty_data_pluviometric
         )
         # Create table in BigQuery
         UPLOAD_TABLE = create_table_and_upload_to_gcs(
@@ -431,7 +432,7 @@
                 dataset_processor_response, dataset_processor_id = get_dataset_processor_info(
                     api, processor_name
                 )
-
+            # TODO: converter os horarios do alertario para UTC antes de resgistrar
             dataset_response = register_dataset_on_gypscie(
                 api, filepath=full_path_pluviometric, domain_id=domain_id
             )
diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index 703f0ac6..a7979bd9 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -181,6 +181,7 @@ def treat_pluviometer_and_meteorological_data(
 @task(nout=2)
 def save_data(
     dfr: pd.DataFrame,
+    treatment_version: int,
     data_name: str = "temp",
     wait=None,  # pylint: disable=unused-argument
 ) -> Tuple[Union[str, Path], Union[str, Path]]:
@@ -204,7 +205,7 @@ def save_data(
         partition_columns=partitions,
         savepath=prepath,
         data_type="csv",
-        suffix=current_time,
+        suffix=str(treatment_version)+"_"+current_time,
     )
     log(f"Files saved on {prepath}, full path is {full_path}")
     return prepath, full_path

From 14723774b5620c0afc5491af9a320130ca665681 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 17 Oct 2024 15:02:11 -0300
Subject: [PATCH 08/56] adding treatment version on gypscie register dataset

---
 pipelines/meteorologia/radar/mendanha/flows.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pipelines/meteorologia/radar/mendanha/flows.py b/pipelines/meteorologia/radar/mendanha/flows.py
index 38524f61..45f933a2 100644
--- a/pipelines/meteorologia/radar/mendanha/flows.py
+++ b/pipelines/meteorologia/radar/mendanha/flows.py
@@ -131,7 +131,7 @@
 
     # Dataset path, if it was saved on ETL flow or it will be None
     dataset_path = Parameter("dataset_path", default=None, required=False)  # dataset_path
-    model_version = Parameter("model_version", default=1, required=False)
+    treatment_version = Parameter("treatment_version", default=1, required=False)
 
     ############################
     #  Start radar flow        #
@@ -282,7 +282,8 @@
             dataset_processor_response, dataset_processor_id = get_dataset_processor_info(
                 api_gypscie, processor_name
             )
-        # TODO: e se o radar_files tiver mais de um arquivo?
+        # TODO: ao salvar o nome do radar_files salvar com sufixo treatment_version
+        # pq te que ser unico no gypscie
         dataset_response = register_dataset_on_gypscie(
             api_gypscie, filepath=radar_files, domain_id=domain_id
         )
@@ -306,7 +307,7 @@
         )
         dfr_ = path_to_dfr(dataset_path)
         # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
-        dfr = add_columns_on_dfr(dfr_, model_version, update_time=True)
+        dfr = add_columns_on_dfr(dfr_, treatment_version, update_time=True)
 
         # Save pre-treated data on local file with partitions
         now_datetime = get_now_datetime()

From e28585935fd6d337c0e7916186787004fc0c2418 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Wed, 30 Oct 2024 18:45:23 -0300
Subject: [PATCH 09/56] adding functions to treat data on gypscie

---
 .../precipitacao_alertario/flows.py           | 89 ++++++++++++++-----
 .../precipitacao_alertario/tasks.py           | 30 ++++++-
 2 files changed, 96 insertions(+), 23 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 69bf45dc..90a2a231 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -27,6 +27,7 @@
 from pipelines.meteorologia.precipitacao_alertario.schedules import minute_schedule
 from pipelines.meteorologia.precipitacao_alertario.tasks import (
     check_to_run_dbt,
+    convert_sp_timezone_to_utc,
     download_data,
     save_data,
     save_last_dbt_update,
@@ -48,12 +49,14 @@
     access_api,
     add_columns_on_dfr,
     download_datasets_from_gypscie,
-    execute_dataset_processor,
+    execute_dataflow_on_gypscie,
+    get_dataflow_alertario_params,
     get_dataset_info,
+    get_dataset_name_on_gypscie,
     get_dataset_processor_info,
     path_to_dfr,
     register_dataset_on_gypscie,
-    task_wait_run,
+    unzip_files,
 )
 
 wait_for_flow_run_with_5min_timeout = wait_for_flow_run_with_timeout(timeout=timedelta(minutes=5))
@@ -96,6 +99,7 @@
     # Preprocessing gypscie parameters
     preprocessing_gypscie = Parameter("preprocessing_gypscie", default=False, required=False)
     # Gypscie parameters
+    workflow_id = Parameter("workflow_id", default=1, required=False)
     environment_id = Parameter("environment_id", default=1, required=False)
     domain_id = Parameter("domain_id", default=1, required=False)
     project_id = Parameter("project_id", default=1, required=False)
@@ -106,6 +110,20 @@
     processor_name = Parameter("processor_name", default="etl_alertario22", required=True)
     dataset_processor_id = Parameter("dataset_processor_id", default=43, required=False)  # mudar
 
+    load_data_function_id = Parameter("load_data_function_id", default=53, required=False)
+    parse_date_time_function_id = Parameter(
+        "parse_date_time_function_id", default=54, required=False
+    )
+    drop_duplicates_function_id = Parameter(
+        "drop_duplicates_function_id", default=55, required=False
+    )
+    replace_inconsistent_values_function_id = Parameter(
+        "replace_inconsistent_values_function_id", default=56, required=False
+    )
+    add_lat_lon_function_id = Parameter("add_lat_lon_function_id", default=57, required=False)
+    save_data_function_id = Parameter("save_data_function_id", default=58, required=False)
+    rain_gauge_metadata_path = Parameter("rain_gauge_metadata_path", default=227, required=False)
+
     # Parameters for saving data preprocessed on GCP
     dataset_id_previsao_chuva = Parameter(
         "dataset_id_previsao_chuva", default="clima_previsao_chuva", required=False
@@ -127,13 +145,19 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_pluviometric,
+        empty_data_pluviometric,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_meteorological,
+        empty_data_meteorological,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
@@ -434,29 +458,52 @@
                 dataset_processor_response, dataset_processor_id = get_dataset_processor_info(
                     api, processor_name
                 )
-            # TODO: converter os horarios do alertario para UTC antes de resgistrar
-            dataset_response = register_dataset_on_gypscie(
-                api, filepath=full_path_pluviometric, domain_id=domain_id
+            dfr_pluviometric_gypscie = convert_sp_timezone_to_utc(dfr_pluviometric)
+            path_pluviometric_gypscie, full_path_pluviometric_gypscie = save_data(
+                dfr_pluviometric_gypscie,
+                columns=["id_estacao", "data_medicao", "acumulado_chuva_5min"],
+                data_name="gypscie",
+            )
+            register_dataset_response = register_dataset_on_gypscie(
+                api, filepath=path_pluviometric_gypscie, domain_id=domain_id
             )
-            # TODO: verifcar no codigo do augustp se são esses os parametros corretos
-            processor_parameters = {
-                "dataset1": str(dataset_path).rsplit("/", maxsplit=1)[-1],
-                "station_type": station_type,
-            }
 
-            dataset_processor_task_id = execute_dataset_processor(
-                api,
-                processor_id=dataset_processor_id,
-                dataset_id=[dataset_response["id"]],
+            model_params = get_dataflow_alertario_params(
+                workflow_id=workflow_id,
                 environment_id=environment_id,
                 project_id=project_id,
-                parameters=processor_parameters,
+                rain_gauge_data_id=register_dataset_response["id"],
+                rain_gauge_metadata_path=rain_gauge_metadata_path,
+                load_data_funtion_id=load_data_function_id,
+                parse_date_time_function_id=parse_date_time_function_id,
+                drop_duplicates_function_id=drop_duplicates_function_id,
+                replace_inconsistent_values_function_id=replace_inconsistent_values_function_id,
+                add_lat_lon_function_id=add_lat_lon_function_id,
+                save_data_function_id=save_data_function_id,
             )
-            wait_run = task_wait_run(api, dataset_processor_task_id, flow_type="processor")
-            dataset_path = download_datasets_from_gypscie(
-                api, dataset_names=[dataset_response["id"]], wait=wait_run
+
+            # Send dataset ids to gypscie to get predictions
+            output_dataset_ids = execute_dataflow_on_gypscie(
+                api,
+                model_params,
             )
-            dfr_ = path_to_dfr(dataset_path)
+
+            # dataset_processor_task_id = execute_dataset_processor(
+            #     api,
+            #     processor_id=dataset_processor_id,
+            #     dataset_id=[dataset_response["id"]],
+            #     environment_id=environment_id,
+            #     project_id=project_id,
+            #     parameters=processor_parameters,
+            # )
+            # wait_run = task_wait_run(api, dataset_processor_task_id, flow_type="processor")
+            # dataset_path = download_datasets_from_gypscie(
+            #     api, dataset_names=[dataset_response["id"]], wait=wait_run
+            # )
+            dataset_names = get_dataset_name_on_gypscie(api, output_dataset_ids)  # new
+            ziped_dataset_paths = download_datasets_from_gypscie(api, dataset_names=dataset_names)
+            dataset_paths = unzip_files(ziped_dataset_paths)
+            dfr_ = path_to_dfr(dataset_paths)
             # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
             dfr = add_columns_on_dfr(dfr_, model_version, update_time=True)
 
diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index a7979bd9..7a51cbb1 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -181,7 +181,8 @@ def treat_pluviometer_and_meteorological_data(
 @task(nout=2)
 def save_data(
     dfr: pd.DataFrame,
-    treatment_version: int,
+    columns: str = None,
+    treatment_version: str = "",
     data_name: str = "temp",
     wait=None,  # pylint: disable=unused-argument
 ) -> Tuple[Union[str, Path], Union[str, Path]]:
@@ -200,12 +201,15 @@ def save_data(
     log(f"Dataframe for {data_name} after partitions {dataframe.iloc[0]}")
     log(f"Dataframe for {data_name} after partitions {dataframe.dtypes}")
 
+    if columns:
+        dataframe = dataframe[columns]
+
     full_path = to_partitions(
         data=dataframe,
         partition_columns=partitions,
         savepath=prepath,
         data_type="csv",
-        suffix=str(treatment_version)+"_"+current_time,
+        suffix=treatment_version + "_" + current_time,
     )
     log(f"Files saved on {prepath}, full path is {full_path}")
     return prepath, full_path
@@ -348,3 +352,25 @@ def save_data_old(
     )
     log(f"{data_name} files saved on {prepath}")
     return prepath
+
+
+@task
+def convert_sp_timezone_to_utc(dfr, data_column: str = "data_medicao") -> pd.DataFrame:
+    """
+    Convert a dataframe data_column from São Paulo (UTC-3) to UTC.
+
+    Parameters:
+    dfr (pd.DataFrame): DataFrame with data_column.
+
+    Returns:
+    pd.DataFrame: DataFrame with data_column converted to UTC.
+    """
+
+    if data_column not in dfr.columns:
+        raise ValueError(f"DataFrame must contain a column named {data_column}.")
+
+    dfr[data_column] = pd.to_datetime(dfr[data_column])
+    dfr[data_column] = dfr[data_column].dt.tz_localize("America/Sao_Paulo")
+    dfr[data_column] = dfr[data_column].dt.tz_convert("UTC")
+
+    return dfr

From 2ab57c9972ff9e932cd8ddb0656e6fa60ff7ba81 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Wed, 30 Oct 2024 19:00:12 -0300
Subject: [PATCH 10/56] changing gypscie tasks

---
 pipelines/utils/gypscie/tasks.py | 214 ++++++++++++++++++++++++++-----
 1 file changed, 185 insertions(+), 29 deletions(-)

diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index b633c80d..48feb596 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -1,16 +1,20 @@
 # -*- coding: utf-8 -*-
+# pylint: disable= C0207
 """
 Tasks
 """
 import datetime
 import os
+import zipfile
 from pathlib import Path
+
 from time import sleep
 from typing import Dict, List
 
 import numpy as np
 import pandas as pd
-from basedosdados.upload.base import Base
+
+from basedosdados import Base
 from google.cloud import bigquery
 from prefect import task
 from prefect.engine.signals import ENDRUN
@@ -38,8 +42,8 @@ def access_api():
     # username = get_secret(secret_name="USERNAME", path="/gypscie", environment="prod")
     # password = get_secret(secret_name="PASSWORD", path="/gypscie", environment="prod")
 
-    username = get_secret(infisical_username, path="/gypscie")[infisical_username]
-    password = get_secret(infisical_password, path="/gypscie")[infisical_password]
+    username = get_secret(infisical_username, path="/gypscie_dexl")[infisical_username]
+    password = get_secret(infisical_password, path="/gypscie_dexl")[infisical_password]
     api = GypscieApi(username=username, password=password)
 
     return api
@@ -171,7 +175,7 @@ def execute_dataset_processor(
     dataset_id: list,  # como pegar os vários datasets
     environment_id: int,
     project_id: int,
-    parameters: dict
+    parameters: dict,
     # adicionar campos do dataset_processor
 ) -> List:
     """
@@ -289,14 +293,16 @@ def query_data_from_gcp(  # pylint: disable=too-many-arguments
 
 
 @task()
-def execute_prediction_on_gypscie(
+def execute_dataflow_on_gypscie(
     api,
     model_params: dict,
     # hours_to_predict,
-) -> str:
+) -> List:
     """
     Requisição de execução de um processo de Predição
-    Return task_id
+    Return
+    {'state': 'STARTED'}
+    {'result': {'output_datasets': [236]}, 'state': 'SUCCESS'}
     """
     log("Starting prediction")
     task_response = api.post(
@@ -316,10 +322,8 @@ def execute_prediction_on_gypscie(
         task_state = Failed(failed_message)
         raise ENDRUN(state=task_state)
 
-    print(f"Prediction ended. Response: {response}, {response.json()}")
-    # TODO: retorna a predição? o id da do dataset?
-
-    return response.json().get("task_id")  # response.json().get('task_id')
+    log(f"Prediction ended. Response: {response}")
+    return response["result"].get("output_datasets")
 
 
 @task
@@ -331,6 +335,86 @@ def task_wait_run(api, task_response, flow_type: str = "dataflow") -> Dict:
     return wait_run(api, task_response, flow_type)
 
 
+@task
+def get_dataflow_alertario_params(  # pylint: disable=too-many-arguments
+    workflow_id,
+    environment_id,
+    project_id,
+    rain_gauge_data_id,
+    rain_gauge_metadata_path,
+    load_data_funtion_id,
+    parse_date_time_function_id,
+    drop_duplicates_function_id,
+    replace_inconsistent_values_function_id,
+    add_lat_lon_function_id,
+    save_data_function_id,
+) -> List:
+    """
+    Return parameters for the alertario ETL
+
+    {
+        "workflow_id": 41,
+        "environment_id": 1,
+        "parameters": [
+            {
+                "function_id":53,  # load_data
+                "params": {
+                    "rain_gauge_data_path":226,
+                    "rain_gauge_metadata_path":227
+                }
+            },
+            {
+                "function_id":54  # parse_date_time
+            },
+            {
+                "function_id":55  # drop_duplicates
+            },
+            {
+                "function_id":56  # replace_inconsistent_values
+            },
+            {
+                "function_id":57  # add_lat_lon
+            },
+            {
+                "function_id":58,  # save_data
+                "params": {"output_path":"dados_alertario_20230112_190000.parquet"}
+            }
+        ],
+        "project_id": 1
+    }
+    """
+    return {
+        "workflow_id": workflow_id,
+        "environment_id": environment_id,
+        "parameters": [
+            {
+                "function_id": load_data_funtion_id,
+                "params": {
+                    "rain_gauge_data_path": rain_gauge_data_id,
+                    "rain_gauge_metadata_path": rain_gauge_metadata_path,
+                },
+            },
+            {
+                "function_id": parse_date_time_function_id,
+            },
+            {
+                "function_id": drop_duplicates_function_id,
+            },
+            {
+                "function_id": replace_inconsistent_values_function_id,
+            },
+            {
+                "function_id": add_lat_lon_function_id,
+            },
+            {
+                "function_id": save_data_function_id,
+                "params": {"output_path": "preprocessed_data_alertario.parquet"},
+            },
+        ],
+        "project_id": project_id,
+    }
+
+
 @task
 def get_dataflow_params(  # pylint: disable=too-many-arguments
     workflow_id,
@@ -343,6 +427,7 @@ def get_dataflow_params(  # pylint: disable=too-many-arguments
     rain_gauge_data_id,
     grid_data_id,
     model_data_id,
+    output_function_id,
 ) -> List:
     """
     Return parameters for the model
@@ -382,6 +467,7 @@ def get_dataflow_params(  # pylint: disable=too-many-arguments
                 "function_id": pre_processing_function_id,
             },
             {"function_id": model_function_id, "params": {"model_path": model_data_id}},
+            {"function_id": output_function_id, "params": {"output_path": "prediction.npy"}},
         ],
         "project_id": project_id,
     }
@@ -402,27 +488,93 @@ def get_output_dataset_ids_on_gypscie(
         if err.response.status_code == 404:
             print(f"Task {task_id} not found")
             return []
+    log(f"status_workflow_run response {response}")
 
     return response.get("output_datasets")
 
 
+@task()
+def get_dataset_name_on_gypscie(
+    api,
+    dataset_ids: list,
+) -> List:
+    """
+    Get datasets name using their dataset ids
+    """
+    dataset_names = []
+    log(f"All dataset_ids to get names: {dataset_ids}")
+    for dataset_id in dataset_ids:
+        log(f"Getting name for dataset id: {dataset_id}")
+        try:
+            response = api.get(path="datasets/" + str(dataset_id))
+        except HTTPError as err:
+            if err.response.status_code == 404:
+                print(f"Dataset_id {dataset_id} not found")
+                return []
+        log(f"Get dataset name response {response}")
+        dataset_names.append(response.get("name"))
+    log(f"All dataset names {dataset_names}")
+    return dataset_names
+
+
 @task()
 def download_datasets_from_gypscie(
     api,
     dataset_names: List,
-    wait=None,
+    wait=None,  # pylint: disable=unused-argument
 ) -> List:
     """
     Get output files with predictions
     """
-    for file_name in dataset_names:
-        response = api.get(path=f"download/datasets/{file_name}.zip")
+    log(f"\n\nDataset names to be downloaded from Gypscie: {dataset_names}")
+    for dataset_name in dataset_names:
+        log(f"Downloading dataset {dataset_name} from Gypscie")
+        response = api.get(f"download/datasets/{dataset_name}.zip")
+        log(f"Download {dataset_name}'s response: {response}")
         if response.status_code == 200:
-            log(f"Dataset {file_name} downloaded")
+            dataset = response.content
+            with open(f"{dataset_name}.zip", "wb") as file:
+                file.write(dataset)
+            log(f"Dataset {dataset_name} downloaded")
         else:
-            log(f"Dataset {file_name} not found on Gypscie")
-    # TODO: verificar se o arquivo é .zip mesmo
-    return [dataset_name + ".zip" for dataset_name in dataset_names]
+            log(f"Dataset {dataset_name} not found on Gypscie")
+    return dataset_names
+
+
+@task
+def unzip_files(zip_files: List[str], destination_folder: str = "./") -> List[str]:
+    """
+    Unzip files to destination folder
+    """
+    zip_files = [
+        zip_file if zip_file.endswith(".zip") else zip_file + ".zip" for zip_file in zip_files
+    ]
+    os.makedirs(destination_folder, exist_ok=True)
+
+    unziped_files = []
+    for zip_file in zip_files:
+        with zipfile.ZipFile(zip_file, "r") as zip_ref:
+            zip_ref.extractall(destination_folder)
+            unziped_files.extend(
+                [
+                    os.path.join(destination_folder, nome_arquivo)
+                    for nome_arquivo in zip_ref.namelist()
+                ]
+            )
+
+    return unziped_files
+
+
+@task
+def read_numpy_files(file_paths: List[str]) -> List[np.ndarray]:
+    """
+    Read numpy arrays and return a list with of them
+    """
+    arrays = []
+    for file_path in file_paths:
+        array = np.load(file_path)
+        arrays.append(array)
+    return arrays
 
 
 @task
@@ -451,6 +603,7 @@ def geolocalize_data(prediction_datasets: np.ndarray, now_datetime: str) -> pd.D
     Expected columns: latitude, longitude, janela_predicao,
     valor_predicao, data_predicao (timestamp em que foi realizada a previsão)
     """
+    now_datetime = now_datetime + 1
     return prediction_datasets
 
 
@@ -523,7 +676,7 @@ def create_and_save_image(data: xr.xarray, variable) -> Path:
         return save_image_path
     """
     save_image_path = "image.png"
-
+    data = data + 1
     return save_image_path
 
 
@@ -552,9 +705,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info[
-                "destination_table_id"
-            ] = "preprocessamento_estacao_meteorologica_alertario"
+            dataset_info["destination_table_id"] = (
+                "preprocessamento_estacao_meteorologica_alertario"
+            )
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"
@@ -577,16 +730,19 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
 
 
 def path_to_dfr(path: str) -> pd.DataFrame:
-
     """
     Reads a csv or parquet file from the given path and returns a dataframe
     """
-    if path.endswith(".csv"):
-        dfr = pd.read_csv(path)
-    elif path.endswith(".parquet"):
-        dfr = pd.read_parquet(path)
-    else:
-        raise ValueError("File extension not supported")
+    dfr = pd.DataFrame()
+    try:
+        if path.endswith(".csv"):
+            dfr = pd.read_csv(path)
+        elif path.endswith(".parquet"):
+            dfr = pd.read_parquet(path)
+        else:
+            raise ValueError("File extension not supported")
+    except AttributeError as error:
+        log(f"type(path) {type(path)} error {error}")
     return dfr
 
 

From fad29543cc0643872302ab5ed03b906733065147 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 30 Oct 2024 22:00:32 +0000
Subject: [PATCH 11/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pipelines/meteorologia/precipitacao_alertario/flows.py | 10 ++--------
 pipelines/utils/gypscie/tasks.py                       |  8 +++-----
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 90a2a231..b5c6b08c 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -145,19 +145,13 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (
-        dfr_pluviometric,
-        empty_data_pluviometric,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (
-        dfr_meteorological,
-        empty_data_meteorological,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 48feb596..03e014e5 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -7,13 +7,11 @@
 import os
 import zipfile
 from pathlib import Path
-
 from time import sleep
 from typing import Dict, List
 
 import numpy as np
 import pandas as pd
-
 from basedosdados import Base
 from google.cloud import bigquery
 from prefect import task
@@ -705,9 +703,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info["destination_table_id"] = (
-                "preprocessamento_estacao_meteorologica_alertario"
-            )
+            dataset_info[
+                "destination_table_id"
+            ] = "preprocessamento_estacao_meteorologica_alertario"
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"

From 631d375eaa025f43e1a5d4e97be3bc2bbcbfa477 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Wed, 30 Oct 2024 19:16:16 -0300
Subject: [PATCH 12/56] bugfix"

---
 .../meteorologia/precipitacao_alertario/flows.py   | 14 ++++++++++----
 pipelines/utils/gypscie/tasks.py                   | 13 ++++++-------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index b5c6b08c..3d594f1c 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -47,7 +47,7 @@
 # preprocessing imports
 from pipelines.utils.gypscie.tasks import (  # pylint: disable=E0611, E0401
     access_api,
-    add_columns_on_dfr,
+    add_caracterization_columns_on_dfr,
     download_datasets_from_gypscie,
     execute_dataflow_on_gypscie,
     get_dataflow_alertario_params,
@@ -145,13 +145,19 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_pluviometric,
+        empty_data_pluviometric,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_meteorological,
+        empty_data_meteorological,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
@@ -499,7 +505,7 @@
             dataset_paths = unzip_files(ziped_dataset_paths)
             dfr_ = path_to_dfr(dataset_paths)
             # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
-            dfr = add_columns_on_dfr(dfr_, model_version, update_time=True)
+            dfr = add_caracterization_columns_on_dfr(dfr_, model_version, update_time=True)
 
             # Save pre-treated data on local file with partitions
             now_datetime = get_now_datetime()
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 03e014e5..341a3bdd 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -703,9 +703,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info[
-                "destination_table_id"
-            ] = "preprocessamento_estacao_meteorologica_alertario"
+            dataset_info["destination_table_id"] = (
+                "preprocessamento_estacao_meteorologica_alertario"
+            )
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"
@@ -744,12 +744,11 @@ def path_to_dfr(path: str) -> pd.DataFrame:
     return dfr
 
 
-def add_columns_on_dfr(
-    dfr: pd.DataFrame, model_version: int, update_time: bool = False
+def add_caracterization_columns_on_dfr(
+    dfr: pd.DataFrame, model_version: int = None, update_time: bool = False
 ) -> pd.DataFrame:
     """
-    Reads a csv or parquet file from the given path and adds a column
-    with the update time based on Brazil timezone
+    Add a column with the update time based on Brazil timezone and model version
     """
     if update_time:
         dfr["update_time"] = pd.Timestamp.now(tz="America/Sao_Paulo")

From 88c5a3838ce6eb887bd693a53b7bec2389a7118c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 30 Oct 2024 22:16:41 +0000
Subject: [PATCH 13/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pipelines/meteorologia/precipitacao_alertario/flows.py | 10 ++--------
 pipelines/utils/gypscie/tasks.py                       |  6 +++---
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 3d594f1c..e197cb31 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -145,19 +145,13 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (
-        dfr_pluviometric,
-        empty_data_pluviometric,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (
-        dfr_meteorological,
-        empty_data_meteorological,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 341a3bdd..8cb2affb 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -703,9 +703,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info["destination_table_id"] = (
-                "preprocessamento_estacao_meteorologica_alertario"
-            )
+            dataset_info[
+                "destination_table_id"
+            ] = "preprocessamento_estacao_meteorologica_alertario"
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"

From a70fc67787a7bad305b20fd7f1ee9f41b2b3ef2b Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 08:01:33 -0300
Subject: [PATCH 14/56] trying to solve TypeError: object of type 'Parameter'
 has no len()

---
 pipelines/meteorologia/precipitacao_alertario/tasks.py | 5 ++++-
 pipelines/utils/gypscie/tasks.py                       | 7 +++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index 7a51cbb1..6d145522 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -182,7 +182,7 @@ def treat_pluviometer_and_meteorological_data(
 def save_data(
     dfr: pd.DataFrame,
     columns: str = None,
-    treatment_version: str = "",
+    treatment_version: int = None,
     data_name: str = "temp",
     wait=None,  # pylint: disable=unused-argument
 ) -> Tuple[Union[str, Path], Union[str, Path]]:
@@ -190,6 +190,9 @@ def save_data(
     Salvar dfr tratados em csv para conseguir subir pro GCP
     """
 
+    if not treatment_version:
+        treatment_version = ""
+
     prepath = Path(f"/tmp/precipitacao_alertario/{data_name}")
     prepath.mkdir(parents=True, exist_ok=True)
 
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 8cb2affb..b3421728 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -14,7 +14,7 @@
 import pandas as pd
 from basedosdados import Base
 from google.cloud import bigquery
-from prefect import task
+from prefect import task, Parameter
 from prefect.engine.signals import ENDRUN
 from prefect.engine.state import Failed
 from prefeitura_rio.pipelines_utils.infisical import get_secret
@@ -745,13 +745,16 @@ def path_to_dfr(path: str) -> pd.DataFrame:
 
 
 def add_caracterization_columns_on_dfr(
-    dfr: pd.DataFrame, model_version: int = None, update_time: bool = False
+    dfr: pd.DataFrame, model_version: None, update_time: bool = False
 ) -> pd.DataFrame:
     """
     Add a column with the update time based on Brazil timezone and model version
     """
+
     if update_time:
         dfr["update_time"] = pd.Timestamp.now(tz="America/Sao_Paulo")
     if model_version is not None:
+        if isinstance(model_version, Parameter):
+            model_version = model_version.value
         dfr["model_version"] = model_version
     return dfr

From ff0b24fc1f31c26daa0dec08ed830d9524ed9406 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 31 Oct 2024 11:02:14 +0000
Subject: [PATCH 15/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pipelines/utils/gypscie/tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index b3421728..33aafd8b 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -14,7 +14,7 @@
 import pandas as pd
 from basedosdados import Base
 from google.cloud import bigquery
-from prefect import task, Parameter
+from prefect import Parameter, task
 from prefect.engine.signals import ENDRUN
 from prefect.engine.state import Failed
 from prefeitura_rio.pipelines_utils.infisical import get_secret

From 96ed1ad4476cf72f16e21920beb8c962cc6e63fc Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 08:39:25 -0300
Subject: [PATCH 16/56] trying to solve TypeError: object of type 'Parameter'
 has no len()

---
 pipelines/utils/gypscie/tasks.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 33aafd8b..804de0f1 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -754,7 +754,5 @@ def add_caracterization_columns_on_dfr(
     if update_time:
         dfr["update_time"] = pd.Timestamp.now(tz="America/Sao_Paulo")
     if model_version is not None:
-        if isinstance(model_version, Parameter):
-            model_version = model_version.value
         dfr["model_version"] = model_version
     return dfr

From abdcc4b06c76d7b6b8cad778425fb07172a435b7 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 08:46:47 -0300
Subject: [PATCH 17/56] trying to solve TypeError: object of type 'Parameter'
 has no len()

---
 pipelines/utils/gypscie/tasks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 804de0f1..1829ad2c 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -754,5 +754,6 @@ def add_caracterization_columns_on_dfr(
     if update_time:
         dfr["update_time"] = pd.Timestamp.now(tz="America/Sao_Paulo")
     if model_version is not None:
-        dfr["model_version"] = model_version
+        model_version_ = str(model_version)
+        dfr["model_version"] = model_version_
     return dfr

From be6872b6752b523173521eb9825bc4db67644b9b Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 08:54:56 -0300
Subject: [PATCH 18/56] bugfix

---
 pipelines/meteorologia/radar/mendanha/flows.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/meteorologia/radar/mendanha/flows.py b/pipelines/meteorologia/radar/mendanha/flows.py
index 8dfb47be..a28d2276 100644
--- a/pipelines/meteorologia/radar/mendanha/flows.py
+++ b/pipelines/meteorologia/radar/mendanha/flows.py
@@ -62,7 +62,7 @@
     access_api as access_api_gypscie,  # pylint: disable=E0611, E0401
 )
 from pipelines.utils.gypscie.tasks import (
-    add_columns_on_dfr,
+    add_caracterization_columns_on_dfr,
     download_datasets_from_gypscie,
     execute_dataset_processor,
     get_dataset_info,
@@ -306,7 +306,7 @@
         )
         dfr_ = path_to_dfr(dataset_path)
         # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
-        dfr = add_columns_on_dfr(dfr_, treatment_version, update_time=True)
+        dfr = add_caracterization_columns_on_dfr(dfr_, treatment_version, update_time=True)
 
         # Save pre-treated data on local file with partitions
         now_datetime = get_now_datetime()

From b849697d8c1e931a84b28353d21bff5d70ce2261 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 09:47:15 -0300
Subject: [PATCH 19/56] fixing partition column name and save_data function

---
 .../precipitacao_alertario/flows.py           | 19 ++++++++++++++-----
 .../precipitacao_alertario/tasks.py           |  2 +-
 pipelines/utils/gypscie/tasks.py              | 10 +++++-----
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index e197cb31..a611aeac 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -145,13 +145,19 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_pluviometric,
+        empty_data_pluviometric,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_meteorological,
+        empty_data_meteorological,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
@@ -160,7 +166,10 @@
 
     with case(empty_data_pluviometric, False):
         path_pluviometric, full_path_pluviometric = save_data(
-            dfr_pluviometric, treatment_version, "pluviometric", wait=empty_data_pluviometric
+            dfr_pluviometric,
+            data_name="pluviometric",
+            treatment_version=treatment_version,
+            wait=empty_data_pluviometric,
         )
         # Create table in BigQuery
         UPLOAD_TABLE = create_table_and_upload_to_gcs(
@@ -393,7 +402,7 @@
     # Save and materialize meteorological data
     with case(empty_data_meteorological, False):
         path_meteorological = save_data(
-            dfr_meteorological, "meteorological", wait=empty_data_meteorological
+            dfr_meteorological, data_name="meteorological", wait=empty_data_meteorological
         )
         # Create table in BigQuery
         UPLOAD_TABLE_METEOROLOGICAL = create_table_and_upload_to_gcs(
@@ -455,8 +464,8 @@
             dfr_pluviometric_gypscie = convert_sp_timezone_to_utc(dfr_pluviometric)
             path_pluviometric_gypscie, full_path_pluviometric_gypscie = save_data(
                 dfr_pluviometric_gypscie,
-                columns=["id_estacao", "data_medicao", "acumulado_chuva_5min"],
                 data_name="gypscie",
+                columns=["id_estacao", "data_medicao", "acumulado_chuva_5min"],
             )
             register_dataset_response = register_dataset_on_gypscie(
                 api, filepath=path_pluviometric_gypscie, domain_id=domain_id
diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index 6d145522..62380fca 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -181,9 +181,9 @@ def treat_pluviometer_and_meteorological_data(
 @task(nout=2)
 def save_data(
     dfr: pd.DataFrame,
+    data_name: str = "temp",
     columns: str = None,
     treatment_version: int = None,
-    data_name: str = "temp",
     wait=None,  # pylint: disable=unused-argument
 ) -> Tuple[Union[str, Path], Union[str, Path]]:
     """
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 1829ad2c..fcd1eaf1 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -690,7 +690,7 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         dataset_info = {
             "dataset_id": "clima_pluviometro",
             "filename": "gauge_station_bq",
-            "partition_date_column": "datetime",
+            "partition_date_column": "data_medicao",
         }
         if source == "alertario":
             dataset_info["table_id"] = "taxa_precipitacao_alertario"
@@ -699,13 +699,13 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         dataset_info = {
             "dataset_id": "clima_pluviometro",
             "filename": "weather_station_bq",
-            "partition_date_column": "datetime",
+            "partition_date_column": "data_medicao",
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info[
-                "destination_table_id"
-            ] = "preprocessamento_estacao_meteorologica_alertario"
+            dataset_info["destination_table_id"] = (
+                "preprocessamento_estacao_meteorologica_alertario"
+            )
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"

From 62781691baf7c6bfaed2c64e05738bab343b9a0f Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 11:25:42 -0300
Subject: [PATCH 20/56] bugfix

---
 pipelines/meteorologia/precipitacao_alertario/tasks.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index 62380fca..7793f53d 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -197,6 +197,8 @@ def save_data(
     prepath.mkdir(parents=True, exist_ok=True)
 
     partition_column = "data_medicao"
+    new_partition_columns = ["ano_particao", "mes_particao", "data_particao"]
+    dfr = dfr.drop(columns=[col for col in new_partition_columns if col in dfr.columns])
     log(f"Dataframe for {data_name} before partitions {dfr.iloc[0]}")
     log(f"Dataframe for {data_name} before partitions {dfr.dtypes}")
     dataframe, partitions = parse_date_columns(dfr, partition_column)
@@ -205,7 +207,7 @@ def save_data(
     log(f"Dataframe for {data_name} after partitions {dataframe.dtypes}")
 
     if columns:
-        dataframe = dataframe[columns]
+        dataframe = dataframe[columns + new_partition_columns]
 
     full_path = to_partitions(
         data=dataframe,

From a46a02331430d819de0cbc9f46832e0c1c4a31ce Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 31 Oct 2024 14:26:03 +0000
Subject: [PATCH 21/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pipelines/meteorologia/precipitacao_alertario/flows.py | 10 ++--------
 pipelines/utils/gypscie/tasks.py                       |  6 +++---
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index a611aeac..88474bb4 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -145,19 +145,13 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (
-        dfr_pluviometric,
-        empty_data_pluviometric,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (
-        dfr_meteorological,
-        empty_data_meteorological,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index fcd1eaf1..df0e9b1f 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -703,9 +703,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info["destination_table_id"] = (
-                "preprocessamento_estacao_meteorologica_alertario"
-            )
+            dataset_info[
+                "destination_table_id"
+            ] = "preprocessamento_estacao_meteorologica_alertario"
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"

From 03b1de27bd4373125ffc1bd84c871bf5e04da55a Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 11:39:20 -0300
Subject: [PATCH 22/56] bugfix

---
 pipelines/meteorologia/precipitacao_alertario/tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index 7793f53d..a0b34047 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -214,7 +214,7 @@ def save_data(
         partition_columns=partitions,
         savepath=prepath,
         data_type="csv",
-        suffix=treatment_version + "_" + current_time,
+        suffix=str(treatment_version) + "_" + current_time,
     )
     log(f"Files saved on {prepath}, full path is {full_path}")
     return prepath, full_path

From 382527ff3fecd393ce674b82096da1778be8820f Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 12:41:56 -0300
Subject: [PATCH 23/56] fixing gypscie api

---
 pipelines/constants.py           |  1 +
 pipelines/utils/gypscie/tasks.py | 23 +++++++++++++----------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/pipelines/constants.py b/pipelines/constants.py
index b01c7b82..2134d604 100644
--- a/pipelines/constants.py
+++ b/pipelines/constants.py
@@ -115,6 +115,7 @@ class constants(Enum):
     }
 
     # Infisical
+    INFISICAL_PATH = "/gypscie_dexl"
     INFISICAL_URL = "URL"
     INFISICAL_USERNAME = "USERNAME"
     INFISICAL_PASSWORD = "PASSWORD"
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index df0e9b1f..3b23317e 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -12,13 +12,13 @@
 
 import numpy as np
 import pandas as pd
-from basedosdados import Base
-from google.cloud import bigquery
-from prefect import Parameter, task
-from prefect.engine.signals import ENDRUN
-from prefect.engine.state import Failed
-from prefeitura_rio.pipelines_utils.infisical import get_secret
-from prefeitura_rio.pipelines_utils.logging import log
+from basedosdados import Base  # pylint: disable=E0611, E0401
+from google.cloud import bigquery  # pylint: disable=E0611, E0401
+from prefect import task  # pylint: disable=E0611, E0401
+from prefect.engine.signals import ENDRUN  # pylint: disable=E0611, E0401
+from prefect.engine.state import Failed  # pylint: disable=E0611, E0401
+from prefeitura_rio.pipelines_utils.infisical import get_secret  # pylint: disable=E0611, E0401
+from prefeitura_rio.pipelines_utils.logging import log  # pylint: disable=E0611, E0401
 from requests.exceptions import HTTPError
 
 from pipelines.constants import constants  # pylint: disable=E0611, E0401
@@ -34,15 +34,18 @@ def access_api():
     """# noqa E303
     Acess api and return it to be used in other requests
     """
+    infisical_path = constants.INFISICAL_PATH.value
+    infisical_url = constants.INFISICAL_URL.value
     infisical_username = constants.INFISICAL_USERNAME.value
     infisical_password = constants.INFISICAL_PASSWORD.value
 
     # username = get_secret(secret_name="USERNAME", path="/gypscie", environment="prod")
     # password = get_secret(secret_name="PASSWORD", path="/gypscie", environment="prod")
 
-    username = get_secret(infisical_username, path="/gypscie_dexl")[infisical_username]
-    password = get_secret(infisical_password, path="/gypscie_dexl")[infisical_password]
-    api = GypscieApi(username=username, password=password)
+    url = get_secret(infisical_url, path=infisical_path)[infisical_url]
+    username = get_secret(infisical_username, path=infisical_path)[infisical_username]
+    password = get_secret(infisical_password, path=infisical_path)[infisical_password]
+    api = GypscieApi(base_url=url, username=username, password=password)
 
     return api
 

From fb946caefc68cfba8cb3da3932d7a73673478449 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 14:08:39 -0300
Subject: [PATCH 24/56] bugfix

---
 .../meteorologia/precipitacao_alertario/flows.py  | 15 +++++++++++----
 pipelines/utils/gypscie/tasks.py                  |  9 ++++-----
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 88474bb4..b0c36327 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -13,6 +13,7 @@
     wait_for_flow_run,
 )
 from prefeitura_rio.pipelines_utils.custom import Flow  # pylint: disable=E0611, E0401
+# pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.state_handlers import handler_inject_bd_credentials
 from prefeitura_rio.pipelines_utils.tasks import (  # pylint: disable=E0611, E0401
     create_table_and_upload_to_gcs,
@@ -133,7 +134,7 @@
     )
 
     # Dataset parameters
-    station_type = Parameter("station_type", default="pluviometro", required=False)
+    station_type = Parameter("station_type", default="rain_gauge", required=False)
     source = Parameter("source", default="alertario", required=False)
 
     # Dataset path, if it was saved on ETL flow or it will be None
@@ -145,13 +146,19 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_pluviometric,
+        empty_data_pluviometric,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_meteorological,
+        empty_data_meteorological,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
@@ -395,7 +402,7 @@
 
     # Save and materialize meteorological data
     with case(empty_data_meteorological, False):
-        path_meteorological = save_data(
+        path_meteorological, full_path_meteorological = save_data(
             dfr_meteorological, data_name="meteorological", wait=empty_data_meteorological
         )
         # Create table in BigQuery
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 3b23317e..8c024ca2 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -134,7 +134,6 @@ def register_dataset_on_gypscie(api, filepath: Path, domain_id: int = 1) -> Dict
         + "_"
         + datetime.datetime.now().strftime("%Y%m%d%H%M%S"),  # pylint: disable=use-maxsplit-arg
     }
-    log(type(data), data)
     files = {
         "files": open(file=filepath, mode="rb"),  # pylint: disable=consider-using-with
     }
@@ -706,9 +705,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info[
-                "destination_table_id"
-            ] = "preprocessamento_estacao_meteorologica_alertario"
+            dataset_info["destination_table_id"] = (
+                "preprocessamento_estacao_meteorologica_alertario"
+            )
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"
@@ -726,7 +725,7 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         elif source == "macae":
             dataset_info["storage_path"] = ""
             dataset_info["destination_table_id"] = "preprocessamento_radar_macae"
-
+    log(f"Dataset info: {dataset_info}")
     return dataset_info
 
 

From 0aab9121fa925b6a0957b473e0cf7e3bc1c2ca50 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 31 Oct 2024 17:08:58 +0000
Subject: [PATCH 25/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../meteorologia/precipitacao_alertario/flows.py      | 11 +++--------
 pipelines/utils/gypscie/tasks.py                      | 10 ++++++----
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index b0c36327..e0bfdca5 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -13,6 +13,7 @@
     wait_for_flow_run,
 )
 from prefeitura_rio.pipelines_utils.custom import Flow  # pylint: disable=E0611, E0401
+
 # pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.state_handlers import handler_inject_bd_credentials
 from prefeitura_rio.pipelines_utils.tasks import (  # pylint: disable=E0611, E0401
@@ -146,19 +147,13 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (
-        dfr_pluviometric,
-        empty_data_pluviometric,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (
-        dfr_meteorological,
-        empty_data_meteorological,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 8c024ca2..1c91a97e 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -17,7 +17,9 @@
 from prefect import task  # pylint: disable=E0611, E0401
 from prefect.engine.signals import ENDRUN  # pylint: disable=E0611, E0401
 from prefect.engine.state import Failed  # pylint: disable=E0611, E0401
-from prefeitura_rio.pipelines_utils.infisical import get_secret  # pylint: disable=E0611, E0401
+from prefeitura_rio.pipelines_utils.infisical import (
+    get_secret,  # pylint: disable=E0611, E0401
+)
 from prefeitura_rio.pipelines_utils.logging import log  # pylint: disable=E0611, E0401
 from requests.exceptions import HTTPError
 
@@ -705,9 +707,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info["destination_table_id"] = (
-                "preprocessamento_estacao_meteorologica_alertario"
-            )
+            dataset_info[
+                "destination_table_id"
+            ] = "preprocessamento_estacao_meteorologica_alertario"
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"

From 1288be10a71a419ef970e43bc3e40b739d14b00f Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 14:32:30 -0300
Subject: [PATCH 26/56] bugfix

---
 .../meteorologia/precipitacao_alertario/flows.py   | 13 ++++++++++---
 .../precipitacao_alertario/schedules.py            | 14 ++++++++++++--
 pipelines/tasks.py                                 | 11 ++++++-----
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index e0bfdca5..ed1d6a43 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -147,13 +147,19 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_pluviometric,
+        empty_data_pluviometric,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_meteorological,
+        empty_data_meteorological,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
@@ -464,7 +470,7 @@
                 columns=["id_estacao", "data_medicao", "acumulado_chuva_5min"],
             )
             register_dataset_response = register_dataset_on_gypscie(
-                api, filepath=path_pluviometric_gypscie, domain_id=domain_id
+                api, filepath=full_path_pluviometric_gypscie, domain_id=domain_id
             )
 
             model_params = get_dataflow_alertario_params(
@@ -513,6 +519,7 @@
                 partition_date_column=dataset_info["partition_date_column"],
                 savepath="model_prediction",
                 suffix=now_datetime,
+                wait=dfr,
             )
             ################################
             #  Save preprocessing on GCP   #
diff --git a/pipelines/meteorologia/precipitacao_alertario/schedules.py b/pipelines/meteorologia/precipitacao_alertario/schedules.py
index 2f1a74da..f928c357 100644
--- a/pipelines/meteorologia/precipitacao_alertario/schedules.py
+++ b/pipelines/meteorologia/precipitacao_alertario/schedules.py
@@ -27,17 +27,27 @@
                 "materialize_to_datario": False,
                 "mode": "prod",
                 "dump_to_gcs": False,
+                "maximum_bytes_processed": None,
+                "preprocessing_gypscie": True,
+                "workflow_id": 1,
                 "environment_id": 1,
                 "domain_id": 1,
                 "project_id": 1,
                 "project_name": "rionowcast_precipitation",
+                "treatment_version": 1,
                 "processor_name": "etl_alertario22",
                 "dataset_processor_id": 43,
+                "load_data_function_id": 53,
+                "parse_date_time_function_id": 54,
+                "drop_duplicates_function_id": 55,
+                "replace_inconsistent_values_function_id": 56,
+                "add_lat_lon_function_id": 57,
+                "save_data_function_id": 58,
+                "rain_gauge_metadata_path": 227,
                 "dataset_id_previsao_chuva": "clima_previsao_chuva",
                 "table_id_previsao_chuva": "preprocessamento_pluviometro_alertario",
-                "station_type": "pluviometro",
+                "station_type": "rain_gauge",
                 "source": "alertario",
-                "maximum_bytes_processed": None,
                 "model_version": 1,
             },
         ),
diff --git a/pipelines/tasks.py b/pipelines/tasks.py
index 80942dd9..557c808b 100644
--- a/pipelines/tasks.py
+++ b/pipelines/tasks.py
@@ -9,11 +9,11 @@
 from typing import List, Union
 
 import pandas as pd
-import pendulum
-from google.cloud import storage
-from prefect import task
-from prefect.triggers import all_successful
-from prefeitura_rio.pipelines_utils.infisical import get_secret
+import pendulum  # pylint: disable=E0611, E0401
+from google.cloud import storage  # pylint: disable=E0611, E0401
+from prefect import task  # pylint: disable=E0611, E0401
+from prefect.triggers import all_successful  # pylint: disable=E0611, E0401
+from prefeitura_rio.pipelines_utils.infisical import get_secret  # pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.pandas import (  # pylint: disable=E0611, E0401
     parse_date_columns,
     to_partitions,
@@ -309,6 +309,7 @@ def task_create_partitions(
     suffix: str = None,
     build_json_dataframe: bool = False,
     dataframe_key_column: str = None,
+    wait=None,  # pylint: disable=unused-argument
 ) -> Path:  # sourcery skip: raise-specific-error
     """
     Create task for to_partitions

From 0601c6d67f220d93cea2046186e55dc909230342 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 15:29:39 -0300
Subject: [PATCH 27/56] bugfix

---
 pipelines/meteorologia/precipitacao_alertario/tasks.py | 2 +-
 pipelines/tasks.py                                     | 1 +
 pipelines/utils/gypscie/tasks.py                       | 4 ++--
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index a0b34047..72e44c40 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -217,7 +217,7 @@ def save_data(
         suffix=str(treatment_version) + "_" + current_time,
     )
     log(f"Files saved on {prepath}, full path is {full_path}")
-    return prepath, full_path
+    return prepath, full_path[0]
 
 
 @task
diff --git a/pipelines/tasks.py b/pipelines/tasks.py
index 557c808b..0ac567b5 100644
--- a/pipelines/tasks.py
+++ b/pipelines/tasks.py
@@ -314,6 +314,7 @@ def task_create_partitions(
     """
     Create task for to_partitions
     """
+    log(f"Data before partition columns creation {data.iloc[0]}")
     data, partition_columns = parse_date_columns(data, partition_date_column)
     log(f"Created partition columns {partition_columns} and data first row now is {data.iloc[0]}")
     saved_files = to_partitions(
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 1c91a97e..9744e8b5 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -694,7 +694,7 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         dataset_info = {
             "dataset_id": "clima_pluviometro",
             "filename": "gauge_station_bq",
-            "partition_date_column": "data_medicao",
+            "partition_date_column": "datetime",
         }
         if source == "alertario":
             dataset_info["table_id"] = "taxa_precipitacao_alertario"
@@ -703,7 +703,7 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         dataset_info = {
             "dataset_id": "clima_pluviometro",
             "filename": "weather_station_bq",
-            "partition_date_column": "data_medicao",
+            "partition_date_column": "datetime",
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"

From 8d913cc9fc27de33d207e2e20749f37399a8c5ef Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 16:19:44 -0300
Subject: [PATCH 28/56] bugfix

---
 pipelines/meteorologia/precipitacao_alertario/flows.py     | 3 ++-
 pipelines/meteorologia/precipitacao_alertario/schedules.py | 2 +-
 pipelines/meteorologia/precipitacao_alertario/tasks.py     | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index ed1d6a43..6277c953 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -101,7 +101,7 @@
     # Preprocessing gypscie parameters
     preprocessing_gypscie = Parameter("preprocessing_gypscie", default=False, required=False)
     # Gypscie parameters
-    workflow_id = Parameter("workflow_id", default=1, required=False)
+    workflow_id = Parameter("workflow_id", default=41, required=False)
     environment_id = Parameter("environment_id", default=1, required=False)
     domain_id = Parameter("domain_id", default=1, required=False)
     project_id = Parameter("project_id", default=1, required=False)
@@ -468,6 +468,7 @@
                 dfr_pluviometric_gypscie,
                 data_name="gypscie",
                 columns=["id_estacao", "data_medicao", "acumulado_chuva_5min"],
+                data_type="parquet",
             )
             register_dataset_response = register_dataset_on_gypscie(
                 api, filepath=full_path_pluviometric_gypscie, domain_id=domain_id
diff --git a/pipelines/meteorologia/precipitacao_alertario/schedules.py b/pipelines/meteorologia/precipitacao_alertario/schedules.py
index f928c357..1fecfbb8 100644
--- a/pipelines/meteorologia/precipitacao_alertario/schedules.py
+++ b/pipelines/meteorologia/precipitacao_alertario/schedules.py
@@ -29,7 +29,7 @@
                 "dump_to_gcs": False,
                 "maximum_bytes_processed": None,
                 "preprocessing_gypscie": True,
-                "workflow_id": 1,
+                "workflow_id": 41,
                 "environment_id": 1,
                 "domain_id": 1,
                 "project_id": 1,
diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index 72e44c40..22edcde3 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -184,6 +184,7 @@ def save_data(
     data_name: str = "temp",
     columns: str = None,
     treatment_version: int = None,
+    data_type: str = "csv",
     wait=None,  # pylint: disable=unused-argument
 ) -> Tuple[Union[str, Path], Union[str, Path]]:
     """
@@ -213,7 +214,7 @@ def save_data(
         data=dataframe,
         partition_columns=partitions,
         savepath=prepath,
-        data_type="csv",
+        data_type=data_type,
         suffix=str(treatment_version) + "_" + current_time,
     )
     log(f"Files saved on {prepath}, full path is {full_path}")

From f0094ef081f3f195b5061fe81d25b86fe93a6af9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 31 Oct 2024 19:20:06 +0000
Subject: [PATCH 29/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pipelines/meteorologia/precipitacao_alertario/flows.py | 10 ++--------
 pipelines/tasks.py                                     |  4 +++-
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 6277c953..6a6fd133 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -147,19 +147,13 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (
-        dfr_pluviometric,
-        empty_data_pluviometric,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (
-        dfr_meteorological,
-        empty_data_meteorological,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
diff --git a/pipelines/tasks.py b/pipelines/tasks.py
index 0ac567b5..8318a46d 100644
--- a/pipelines/tasks.py
+++ b/pipelines/tasks.py
@@ -13,7 +13,9 @@
 from google.cloud import storage  # pylint: disable=E0611, E0401
 from prefect import task  # pylint: disable=E0611, E0401
 from prefect.triggers import all_successful  # pylint: disable=E0611, E0401
-from prefeitura_rio.pipelines_utils.infisical import get_secret  # pylint: disable=E0611, E0401
+from prefeitura_rio.pipelines_utils.infisical import (
+    get_secret,  # pylint: disable=E0611, E0401
+)
 from prefeitura_rio.pipelines_utils.pandas import (  # pylint: disable=E0611, E0401
     parse_date_columns,
     to_partitions,

From 9506ed07f75c348f5cd4e14d9c8b075cc84eda05 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 16:52:52 -0300
Subject: [PATCH 30/56] converting utc date to specific format

---
 pipelines/meteorologia/precipitacao_alertario/tasks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index 22edcde3..7ade0aed 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -378,5 +378,6 @@ def convert_sp_timezone_to_utc(dfr, data_column: str = "data_medicao") -> pd.Dat
     dfr[data_column] = pd.to_datetime(dfr[data_column])
     dfr[data_column] = dfr[data_column].dt.tz_localize("America/Sao_Paulo")
     dfr[data_column] = dfr[data_column].dt.tz_convert("UTC")
+    dfr[data_column] = dfr[data_column].dt.strftime("%Y-%m-%d %H:%M:%S")
 
     return dfr

From 26acf665a5c0aad0bd1a0bb93a6ec61c7a30a658 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 18:08:07 -0300
Subject: [PATCH 31/56] changing task_create_partitions

---
 .../precipitacao_alertario/flows.py           | 22 +++++++++++----
 pipelines/tasks.py                            | 28 ++++++++++++++-----
 2 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 6a6fd133..a5d686c5 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -147,13 +147,19 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_pluviometric,
+        empty_data_pluviometric,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_meteorological,
+        empty_data_meteorological,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
@@ -503,19 +509,23 @@
             dataset_names = get_dataset_name_on_gypscie(api, output_dataset_ids)  # new
             ziped_dataset_paths = download_datasets_from_gypscie(api, dataset_names=dataset_names)
             dataset_paths = unzip_files(ziped_dataset_paths)
-            dfr_ = path_to_dfr(dataset_paths)
+            dfr_gypscie_ = path_to_dfr(dataset_paths)
             # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
-            dfr = add_caracterization_columns_on_dfr(dfr_, model_version, update_time=True)
+            dfr_gypscie = add_caracterization_columns_on_dfr(
+                dfr_gypscie_, model_version, update_time=True
+            )
 
             # Save pre-treated data on local file with partitions
             now_datetime = get_now_datetime()
             prediction_data_path = task_create_partitions(
-                dfr,
+                data=dfr_gypscie,
                 partition_date_column=dataset_info["partition_date_column"],
                 savepath="model_prediction",
+                preffix="dados_alertario",
                 suffix=now_datetime,
-                wait=dfr,
+                wait=dfr_gypscie,
             )
+            prediction_data_path.set_upstream(dfr_gypscie)
             ################################
             #  Save preprocessing on GCP   #
             ################################
diff --git a/pipelines/tasks.py b/pipelines/tasks.py
index 8318a46d..4f31e3cf 100644
--- a/pipelines/tasks.py
+++ b/pipelines/tasks.py
@@ -13,8 +13,9 @@
 from google.cloud import storage  # pylint: disable=E0611, E0401
 from prefect import task  # pylint: disable=E0611, E0401
 from prefect.triggers import all_successful  # pylint: disable=E0611, E0401
+  # pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.infisical import (
-    get_secret,  # pylint: disable=E0611, E0401
+    get_secret,
 )
 from prefeitura_rio.pipelines_utils.pandas import (  # pylint: disable=E0611, E0401
     parse_date_columns,
@@ -308,26 +309,39 @@ def task_create_partitions(
     # partition_columns: List[str],
     savepath: str = "temp",
     data_type: str = "csv",
+    preffix: str = None,
     suffix: str = None,
     build_json_dataframe: bool = False,
     dataframe_key_column: str = None,
     wait=None,  # pylint: disable=unused-argument
-) -> Path:  # sourcery skip: raise-specific-error
+) -> List[Path]:  # sourcery skip: raise-specific-error
     """
     Create task for to_partitions
     """
+    prepath = Path(f"/tmp/{savepath}")
+    prepath.mkdir(parents=True, exist_ok=True)
+
     log(f"Data before partition columns creation {data.iloc[0]}")
     data, partition_columns = parse_date_columns(data, partition_date_column)
     log(f"Created partition columns {partition_columns} and data first row now is {data.iloc[0]}")
-    saved_files = to_partitions(
+    full_paths = to_partitions(
         data=data,
         partition_columns=partition_columns,
-        savepath=savepath,
+        savepath=prepath,
         data_type=data_type,
         suffix=suffix,
         build_json_dataframe=build_json_dataframe,
         dataframe_key_column=dataframe_key_column,
     )
-    log(f"Partition saved files {saved_files}")
-    log(f"Returned path {savepath}, {type(savepath)}")
-    return Path(savepath)
+    if preffix:
+        new_paths = []
+        for full_path in full_paths:
+            new_filename = full_path.name.replace("data_", f"{preffix}_data_")
+            savepath = full_path.with_name(new_filename)
+
+            # Renomear o arquivo
+            full_path.rename(savepath)
+            new_paths.append(savepath)
+        full_paths = new_paths
+    log(f"Returned path {full_paths}, {type(full_paths)}")
+    return full_paths

From 987ab9fe45e0442908114be28863e837024bf23a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 31 Oct 2024 21:08:35 +0000
Subject: [PATCH 32/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pipelines/meteorologia/precipitacao_alertario/flows.py | 10 ++--------
 pipelines/tasks.py                                     |  7 +++----
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index a5d686c5..1a8c282a 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -147,19 +147,13 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (
-        dfr_pluviometric,
-        empty_data_pluviometric,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (
-        dfr_meteorological,
-        empty_data_meteorological,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
diff --git a/pipelines/tasks.py b/pipelines/tasks.py
index 4f31e3cf..f2932a5c 100644
--- a/pipelines/tasks.py
+++ b/pipelines/tasks.py
@@ -13,10 +13,9 @@
 from google.cloud import storage  # pylint: disable=E0611, E0401
 from prefect import task  # pylint: disable=E0611, E0401
 from prefect.triggers import all_successful  # pylint: disable=E0611, E0401
-  # pylint: disable=E0611, E0401
-from prefeitura_rio.pipelines_utils.infisical import (
-    get_secret,
-)
+
+# pylint: disable=E0611, E0401
+from prefeitura_rio.pipelines_utils.infisical import get_secret
 from prefeitura_rio.pipelines_utils.pandas import (  # pylint: disable=E0611, E0401
     parse_date_columns,
     to_partitions,

From 14e3b8713fc95426e53344f8d98ae43b82d159da Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 18:33:49 -0300
Subject: [PATCH 33/56] bugfix

---
 .../precipitacao_alertario/tasks.py           | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index 7ade0aed..45fbf897 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# pylint: disable=C0103,R0914
+# pylint: disable=C0103,R0914,R0913
 """
 Tasks for precipitacao_alertario
 """
@@ -185,6 +185,7 @@ def save_data(
     columns: str = None,
     treatment_version: int = None,
     data_type: str = "csv",
+    preffix: str = None,
     wait=None,  # pylint: disable=unused-argument
 ) -> Tuple[Union[str, Path], Union[str, Path]]:
     """
@@ -210,15 +211,26 @@ def save_data(
     if columns:
         dataframe = dataframe[columns + new_partition_columns]
 
-    full_path = to_partitions(
+    full_paths = to_partitions(
         data=dataframe,
         partition_columns=partitions,
         savepath=prepath,
         data_type=data_type,
         suffix=str(treatment_version) + "_" + current_time,
     )
-    log(f"Files saved on {prepath}, full path is {full_path}")
-    return prepath, full_path[0]
+    if preffix:
+        log(f"Adding preffix {preffix} on {full_paths}")
+        new_paths = []
+        for full_path in full_paths:
+            new_filename = full_path.name.replace("data_", f"{preffix}_data_")
+            savepath = full_path.with_name(new_filename)
+
+            # Renomear o arquivo
+            full_path.rename(savepath)
+            new_paths.append(savepath)
+        full_paths = new_paths
+    log(f"Files saved on {prepath}, full paths are {full_paths}")
+    return prepath, full_paths[0]
 
 
 @task

From e66fce73b25838ea8c395bc02b6848929e70c336 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Thu, 31 Oct 2024 18:47:30 -0300
Subject: [PATCH 34/56] bugfix

---
 pipelines/meteorologia/precipitacao_alertario/flows.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 1a8c282a..87d61c11 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -463,6 +463,7 @@
                 data_name="gypscie",
                 columns=["id_estacao", "data_medicao", "acumulado_chuva_5min"],
                 data_type="parquet",
+                preffix="dados_alertario",
             )
             register_dataset_response = register_dataset_on_gypscie(
                 api, filepath=full_path_pluviometric_gypscie, domain_id=domain_id
@@ -515,7 +516,6 @@
                 data=dfr_gypscie,
                 partition_date_column=dataset_info["partition_date_column"],
                 savepath="model_prediction",
-                preffix="dados_alertario",
                 suffix=now_datetime,
                 wait=dfr_gypscie,
             )

From bda27b735164a7571bd2b3924b9dc8fda07d27eb Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Fri, 1 Nov 2024 10:39:29 -0300
Subject: [PATCH 35/56] renaming file to dados_alertario_raw

---
 .../meteorologia/precipitacao_alertario/flows.py  |  3 ++-
 .../meteorologia/precipitacao_alertario/tasks.py  | 15 +++++++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 87d61c11..6f7610f0 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -463,7 +463,8 @@
                 data_name="gypscie",
                 columns=["id_estacao", "data_medicao", "acumulado_chuva_5min"],
                 data_type="parquet",
-                preffix="dados_alertario",
+                rename="dados_alertario_raw",
+                suffix=False,
             )
             register_dataset_response = register_dataset_on_gypscie(
                 api, filepath=full_path_pluviometric_gypscie, domain_id=domain_id
diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index 45fbf897..497765f3 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -186,6 +186,8 @@ def save_data(
     treatment_version: int = None,
     data_type: str = "csv",
     preffix: str = None,
+    suffix: bool = True,
+    rename: str = None,
     wait=None,  # pylint: disable=unused-argument
 ) -> Tuple[Union[str, Path], Union[str, Path]]:
     """
@@ -194,6 +196,8 @@ def save_data(
 
     if not treatment_version:
         treatment_version = ""
+    else:
+        treatment_version = str(treatment_version) + "_"
 
     prepath = Path(f"/tmp/precipitacao_alertario/{data_name}")
     prepath.mkdir(parents=True, exist_ok=True)
@@ -204,10 +208,11 @@ def save_data(
     log(f"Dataframe for {data_name} before partitions {dfr.iloc[0]}")
     log(f"Dataframe for {data_name} before partitions {dfr.dtypes}")
     dataframe, partitions = parse_date_columns(dfr, partition_column)
-    current_time = pendulum.now("America/Sao_Paulo").strftime("%Y%m%d%H%M")
     log(f"Dataframe for {data_name} after partitions {dataframe.iloc[0]}")
     log(f"Dataframe for {data_name} after partitions {dataframe.dtypes}")
 
+    if suffix:
+        suffix = pendulum.now("America/Sao_Paulo").strftime("%Y%m%d%H%M")
     if columns:
         dataframe = dataframe[columns + new_partition_columns]
 
@@ -216,13 +221,14 @@ def save_data(
         partition_columns=partitions,
         savepath=prepath,
         data_type=data_type,
-        suffix=str(treatment_version) + "_" + current_time,
+        suffix=suffix,
     )
-    if preffix:
+    if preffix or rename:
         log(f"Adding preffix {preffix} on {full_paths}")
         new_paths = []
         for full_path in full_paths:
-            new_filename = full_path.name.replace("data_", f"{preffix}_data_")
+            change_filename = f"{preffix}_data_" if preffix else rename
+            new_filename = full_path.name.replace("data_", change_filename)
             savepath = full_path.with_name(new_filename)
 
             # Renomear o arquivo
@@ -230,6 +236,7 @@ def save_data(
             new_paths.append(savepath)
         full_paths = new_paths
     log(f"Files saved on {prepath}, full paths are {full_paths}")
+    # TODO alterar funções seguintes para receberem uma lista em vez de ter o full_paths[0]
     return prepath, full_paths[0]
 
 

From d7c8e29dfb2ca90a64b078f74bacc650fbb677a0 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Fri, 1 Nov 2024 10:57:18 -0300
Subject: [PATCH 36/56] bugfix"

---
 pipelines/meteorologia/precipitacao_alertario/tasks.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index 497765f3..c93b0456 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -194,10 +194,7 @@ def save_data(
     Salvar dfr tratados em csv para conseguir subir pro GCP
     """
 
-    if not treatment_version:
-        treatment_version = ""
-    else:
-        treatment_version = str(treatment_version) + "_"
+    treatment_version = str(treatment_version) + "_" if treatment_version else ""
 
     prepath = Path(f"/tmp/precipitacao_alertario/{data_name}")
     prepath.mkdir(parents=True, exist_ok=True)
@@ -206,13 +203,10 @@ def save_data(
     new_partition_columns = ["ano_particao", "mes_particao", "data_particao"]
     dfr = dfr.drop(columns=[col for col in new_partition_columns if col in dfr.columns])
     log(f"Dataframe for {data_name} before partitions {dfr.iloc[0]}")
-    log(f"Dataframe for {data_name} before partitions {dfr.dtypes}")
     dataframe, partitions = parse_date_columns(dfr, partition_column)
     log(f"Dataframe for {data_name} after partitions {dataframe.iloc[0]}")
-    log(f"Dataframe for {data_name} after partitions {dataframe.dtypes}")
 
-    if suffix:
-        suffix = pendulum.now("America/Sao_Paulo").strftime("%Y%m%d%H%M")
+    suffix = pendulum.now("America/Sao_Paulo").strftime("%Y%m%d%H%M") if suffix else None
     if columns:
         dataframe = dataframe[columns + new_partition_columns]
 

From 0b2ad3d803fc2193935da00e3ce1e1384c870f75 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Fri, 1 Nov 2024 11:13:54 -0300
Subject: [PATCH 37/56] bugfix"

---
 pipelines/meteorologia/precipitacao_alertario/tasks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index c93b0456..cdbe5fc1 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -221,8 +221,8 @@ def save_data(
         log(f"Adding preffix {preffix} on {full_paths}")
         new_paths = []
         for full_path in full_paths:
-            change_filename = f"{preffix}_data_" if preffix else rename
-            new_filename = full_path.name.replace("data_", change_filename)
+            change_filename = f"{preffix}_data" if preffix else rename
+            new_filename = full_path.name.replace("data", change_filename)
             savepath = full_path.with_name(new_filename)
 
             # Renomear o arquivo

From 3856421c1c2dee0f3585b5bfcd66f325d665157f Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Fri, 1 Nov 2024 11:57:03 -0300
Subject: [PATCH 38/56] changing column type before registring dataset on
 gypscie

---
 .../precipitacao_alertario/flows.py           | 16 +++++++--
 pipelines/utils/gypscie/tasks.py              | 34 ++++++++++++++++---
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 6f7610f0..3fbdcb28 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -50,6 +50,7 @@
 from pipelines.utils.gypscie.tasks import (  # pylint: disable=E0611, E0401
     access_api,
     add_caracterization_columns_on_dfr,
+    convert_columns_type,
     download_datasets_from_gypscie,
     execute_dataflow_on_gypscie,
     get_dataflow_alertario_params,
@@ -147,13 +148,19 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_pluviometric,
+        empty_data_pluviometric,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_meteorological,
+        empty_data_meteorological,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
@@ -457,7 +464,10 @@
                 dataset_processor_response, dataset_processor_id = get_dataset_processor_info(
                     api, processor_name
                 )
-            dfr_pluviometric_gypscie = convert_sp_timezone_to_utc(dfr_pluviometric)
+            dfr_pluviometric_converted = convert_columns_type(
+                dfr_pluviometric, columns=["id_estacao"], new_types=[int]
+            )
+            dfr_pluviometric_gypscie = convert_sp_timezone_to_utc(dfr_pluviometric_converted)
             path_pluviometric_gypscie, full_path_pluviometric_gypscie = save_data(
                 dfr_pluviometric_gypscie,
                 data_name="gypscie",
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 9744e8b5..2b3e79a3 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -17,8 +17,9 @@
 from prefect import task  # pylint: disable=E0611, E0401
 from prefect.engine.signals import ENDRUN  # pylint: disable=E0611, E0401
 from prefect.engine.state import Failed  # pylint: disable=E0611, E0401
+# pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.infisical import (
-    get_secret,  # pylint: disable=E0611, E0401
+    get_secret,
 )
 from prefeitura_rio.pipelines_utils.logging import log  # pylint: disable=E0611, E0401
 from requests.exceptions import HTTPError
@@ -707,9 +708,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info[
-                "destination_table_id"
-            ] = "preprocessamento_estacao_meteorologica_alertario"
+            dataset_info["destination_table_id"] = (
+                "preprocessamento_estacao_meteorologica_alertario"
+            )
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"
@@ -761,3 +762,28 @@ def add_caracterization_columns_on_dfr(
         model_version_ = str(model_version)
         dfr["model_version"] = model_version_
     return dfr
+
+
+@task
+def convert_columns_type(
+    dfr: pd.DataFrame, columns: list = None, new_types: list = None
+) -> pd.DataFrame:
+    """
+    Converts specified columns in a DataFrame to the provided data types.
+
+    Parameters:
+        dfr (pd.DataFrame): The input DataFrame to modify.
+        columns (list): List of column names to be converted.
+        new_types (list): List of target data types for each column, in the same order as `columns`.
+
+    Returns:
+        pd.DataFrame: The modified DataFrame with columns converted to specified types.
+    """
+    if len(columns) != len(new_types):
+        raise ValueError("The lists `columns` and `new_types` must be of the same length.")
+
+    for col, new_type in zip(columns, new_types):
+        if col in dfr.columns:
+            dfr[col] = dfr[col].astype(new_type)
+
+    return dfr

From f55b2b5791955d74ffd95ee7d11785e80297a2f3 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Fri, 1 Nov 2024 13:15:15 -0300
Subject: [PATCH 39/56] changin return of get function

---
 pipelines/utils/gypscie/utils.py | 6 +++++-
 pipelines/utils_api.py           | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/pipelines/utils/gypscie/utils.py b/pipelines/utils/gypscie/utils.py
index f5873dc6..f4a0627a 100644
--- a/pipelines/utils/gypscie/utils.py
+++ b/pipelines/utils/gypscie/utils.py
@@ -10,6 +10,7 @@
 
 import basedosdados as bd
 import requests
+import simplejson
 from prefeitura_rio.pipelines_utils.logging import log
 
 
@@ -88,7 +89,10 @@ def get(self, path: str, timeout: int = 120) -> Dict:
         self._refresh_token_if_needed()
         response = requests.get(f"{self._base_url}{path}", headers=self._headers, timeout=timeout)
         response.raise_for_status()
-        return response.json()
+        try:
+            return response.json()
+        except simplejson.JSONDecodeError:
+            return response
 
     def put(self, path, json=None):
         """
diff --git a/pipelines/utils_api.py b/pipelines/utils_api.py
index 2ba46b0b..f600d901 100644
--- a/pipelines/utils_api.py
+++ b/pipelines/utils_api.py
@@ -8,6 +8,7 @@
 from typing import Callable, Dict, Tuple  # , List
 
 import requests
+import simplejson
 from prefeitura_rio.pipelines_utils.logging import log
 
 
@@ -101,7 +102,10 @@ def get(self, path: str, timeout: int = 120) -> Dict:
         self._refresh_token_if_needed()
         response = requests.get(f"{self._base_url}{path}", headers=self._headers, timeout=timeout)
         response.raise_for_status()
-        return response.json()
+        try:
+            return response.json()
+        except simplejson.JSONDecodeError:
+            return response
 
     def put(self, path, json_data=None):
         """

From 1d714c98dd36a5b90579b848eea04264313b161a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 1 Nov 2024 16:15:38 +0000
Subject: [PATCH 40/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../meteorologia/precipitacao_alertario/flows.py      | 10 ++--------
 pipelines/utils/gypscie/tasks.py                      | 11 +++++------
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 3fbdcb28..b4f3c769 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -148,19 +148,13 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (
-        dfr_pluviometric,
-        empty_data_pluviometric,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (
-        dfr_meteorological,
-        empty_data_meteorological,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 2b3e79a3..fa63eef0 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -17,10 +17,9 @@
 from prefect import task  # pylint: disable=E0611, E0401
 from prefect.engine.signals import ENDRUN  # pylint: disable=E0611, E0401
 from prefect.engine.state import Failed  # pylint: disable=E0611, E0401
+
 # pylint: disable=E0611, E0401
-from prefeitura_rio.pipelines_utils.infisical import (
-    get_secret,
-)
+from prefeitura_rio.pipelines_utils.infisical import get_secret
 from prefeitura_rio.pipelines_utils.logging import log  # pylint: disable=E0611, E0401
 from requests.exceptions import HTTPError
 
@@ -708,9 +707,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info["destination_table_id"] = (
-                "preprocessamento_estacao_meteorologica_alertario"
-            )
+            dataset_info[
+                "destination_table_id"
+            ] = "preprocessamento_estacao_meteorologica_alertario"
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"

From 879e3600ceac08bbbed55b478decad71fb0d60e2 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Fri, 1 Nov 2024 13:26:21 -0300
Subject: [PATCH 41/56] adding task to functions

---
 pipelines/utils/gypscie/tasks.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index fa63eef0..fed2f6bb 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -731,6 +731,7 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
     return dataset_info
 
 
+@task
 def path_to_dfr(path: str) -> pd.DataFrame:
     """
     Reads a csv or parquet file from the given path and returns a dataframe
@@ -748,6 +749,7 @@ def path_to_dfr(path: str) -> pd.DataFrame:
     return dfr
 
 
+@task
 def add_caracterization_columns_on_dfr(
     dfr: pd.DataFrame, model_version: None, update_time: bool = False
 ) -> pd.DataFrame:

From 6929b4645f16f055505d4499b3e13d12fc0a982f Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Fri, 1 Nov 2024 13:53:31 -0300
Subject: [PATCH 42/56] bugfix

---
 pipelines/utils/gypscie/tasks.py | 42 ++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index fed2f6bb..2b7f2417 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -707,9 +707,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info[
-                "destination_table_id"
-            ] = "preprocessamento_estacao_meteorologica_alertario"
+            dataset_info["destination_table_id"] = (
+                "preprocessamento_estacao_meteorologica_alertario"
+            )
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"
@@ -732,20 +732,31 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
 
 
 @task
-def path_to_dfr(path: str) -> pd.DataFrame:
+def path_to_dfr(paths: List[str]) -> pd.DataFrame:
     """
-    Reads a csv or parquet file from the given path and returns a dataframe
+    Reads csvs or parquets filess from the given paths and returns a concatenated dataframe.
     """
-    dfr = pd.DataFrame()
-    try:
-        if path.endswith(".csv"):
-            dfr = pd.read_csv(path)
-        elif path.endswith(".parquet"):
-            dfr = pd.read_parquet(path)
-        else:
-            raise ValueError("File extension not supported")
-    except AttributeError as error:
-        log(f"type(path) {type(path)} error {error}")
+    log(f"Start converting files from {paths} to a df.")
+    dataframes = []
+
+    for path in paths:
+        try:
+            if path.endswith(".csv"):
+                dfr_ = pd.read_csv(path)
+            elif path.endswith(".parquet"):
+                dfr_ = pd.read_parquet(path)
+            else:
+                raise ValueError(f"File extension not supported for file: {path}")
+            dataframes.append(dfr_)
+
+        except AttributeError as error:
+            log(f"type(path) {type(path)} error {error}")
+
+    if dataframes:
+        dfr = pd.concat(dataframes, ignore_index=True)
+    else:
+        dfr = pd.DataFrame()
+    log(f"Dataframe : {dfr.iloc[0]}")
     return dfr
 
 
@@ -762,6 +773,7 @@ def add_caracterization_columns_on_dfr(
     if model_version is not None:
         model_version_ = str(model_version)
         dfr["model_version"] = model_version_
+    log(f"Dataframe with new columns {dfr.iloc[0]}")
     return dfr
 
 

From 0f73172ec76841cc5c1da1d2ff3ded01354ff73c Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Fri, 1 Nov 2024 15:16:15 -0300
Subject: [PATCH 43/56] returning a path instead a list on
 task_create_partitions

---
 pipelines/meteorologia/precipitacao_alertario/flows.py | 2 +-
 pipelines/tasks.py                                     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index b4f3c769..c32aeb83 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -524,7 +524,7 @@
                 suffix=now_datetime,
                 wait=dfr_gypscie,
             )
-            prediction_data_path.set_upstream(dfr_gypscie)
+
             ################################
             #  Save preprocessing on GCP   #
             ################################
diff --git a/pipelines/tasks.py b/pipelines/tasks.py
index f2932a5c..c0bd97bc 100644
--- a/pipelines/tasks.py
+++ b/pipelines/tasks.py
@@ -313,7 +313,7 @@ def task_create_partitions(
     build_json_dataframe: bool = False,
     dataframe_key_column: str = None,
     wait=None,  # pylint: disable=unused-argument
-) -> List[Path]:  # sourcery skip: raise-specific-error
+) -> Path:  # sourcery skip: raise-specific-error
     """
     Create task for to_partitions
     """
@@ -343,4 +343,4 @@ def task_create_partitions(
             new_paths.append(savepath)
         full_paths = new_paths
     log(f"Returned path {full_paths}, {type(full_paths)}")
-    return full_paths
+    return full_paths[0]

From a43d0444b272186dd648809eb4174ebb0e2965f5 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Fri, 1 Nov 2024 15:39:41 -0300
Subject: [PATCH 44/56] changing path where to save table

---
 .../meteorologia/precipitacao_alertario/flows.py     | 12 +++++++++---
 pipelines/tasks.py                                   |  8 ++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index c32aeb83..85a08f93 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -148,13 +148,19 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_pluviometric,
+        empty_data_pluviometric,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
+    (
+        dfr_meteorological,
+        empty_data_meteorological,
+    ) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
@@ -517,7 +523,7 @@
 
             # Save pre-treated data on local file with partitions
             now_datetime = get_now_datetime()
-            prediction_data_path = task_create_partitions(
+            prediction_data_path, prediction_data_full_path = task_create_partitions(
                 data=dfr_gypscie,
                 partition_date_column=dataset_info["partition_date_column"],
                 savepath="model_prediction",
diff --git a/pipelines/tasks.py b/pipelines/tasks.py
index c0bd97bc..801c43bc 100644
--- a/pipelines/tasks.py
+++ b/pipelines/tasks.py
@@ -6,7 +6,7 @@
 
 import json
 from pathlib import Path
-from typing import List, Union
+from typing import List, Union, Tuple
 
 import pandas as pd
 import pendulum  # pylint: disable=E0611, E0401
@@ -301,7 +301,7 @@ def save_dataframe(
     return prepath
 
 
-@task
+@task(nout=2)
 def task_create_partitions(
     data: pd.DataFrame,
     partition_date_column: str,
@@ -313,7 +313,7 @@ def task_create_partitions(
     build_json_dataframe: bool = False,
     dataframe_key_column: str = None,
     wait=None,  # pylint: disable=unused-argument
-) -> Path:  # sourcery skip: raise-specific-error
+) -> Tuple[Union[str, Path], Union[str, Path]]:  # sourcery skip: raise-specific-error
     """
     Create task for to_partitions
     """
@@ -343,4 +343,4 @@ def task_create_partitions(
             new_paths.append(savepath)
         full_paths = new_paths
     log(f"Returned path {full_paths}, {type(full_paths)}")
-    return full_paths[0]
+    return prepath, full_paths[0]

From 0649aa087e44911341676163a67bef44a9b94a39 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Mon, 4 Nov 2024 10:38:16 -0300
Subject: [PATCH 45/56] adding rename function and adapting code to treat radar
 data on gypscie

---
 .../meteorologia/radar/mendanha/flows.py      | 83 ++++++++++++-------
 .../meteorologia/radar/mendanha/schedules.py  | 18 ++++
 .../meteorologia/radar/mendanha/utils.py      | 16 ++--
 3 files changed, 78 insertions(+), 39 deletions(-)

diff --git a/pipelines/meteorologia/radar/mendanha/flows.py b/pipelines/meteorologia/radar/mendanha/flows.py
index a28d2276..e36433ff 100644
--- a/pipelines/meteorologia/radar/mendanha/flows.py
+++ b/pipelines/meteorologia/radar/mendanha/flows.py
@@ -8,6 +8,7 @@
 from prefect.run_configs import KubernetesRun  # pylint: disable=E0611, E0401
 from prefect.storage import GCS  # pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.custom import Flow  # pylint: disable=E0611, E0401
+  # pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.state_handlers import handler_inject_bd_credentials
 from prefeitura_rio.pipelines_utils.tasks import (  # pylint: disable=E0611, E0401
     create_table_and_upload_to_gcs,
@@ -64,12 +65,15 @@
 from pipelines.utils.gypscie.tasks import (
     add_caracterization_columns_on_dfr,
     download_datasets_from_gypscie,
-    execute_dataset_processor,
+    execute_dataflow_on_gypscie,
+    get_dataflow_mendanha_params,
     get_dataset_info,
+    get_dataset_name_on_gypscie,
     get_dataset_processor_info,
     path_to_dfr,
     register_dataset_on_gypscie,
-    task_wait_run,
+    rename_files,
+    unzip_files,
 )
 
 # create_visualization_with_background, prefix_to_restore, save_data,
@@ -104,16 +108,25 @@
 
     # Preprocessing gypscie parameters
     preprocessing_gypscie = Parameter("preprocessing_gypscie", default=False, required=False)
+    processor_name = Parameter("processor_name", default="etl_alertario22", required=True)
+    dataset_processor_id = Parameter("dataset_processor_id", default=43, required=False)  # mudar
+    workflow_id = Parameter("workflow_id", default=40, required=False)
+
+    load_data_function_id = Parameter("load_data_function_id", default=46, required=False)
+    filter_data_function_id = Parameter("filter_data_function_id", default=47, required=False)
+    parse_date_time_function_id = Parameter(
+        "parse_date_time_function_id", default=48, required=False
+    )
+    aggregate_data_function_id = Parameter("aggregate_data_function_id", default=49, required=False)
+    save_data_function_id = Parameter("save_data_function_id", default=50, required=False)
+    model_version = Parameter("model_version", default=1, required=False)
+
     # Gypscie parameters
     environment_id = Parameter("environment_id", default=1, required=False)
     domain_id = Parameter("domain_id", default=1, required=False)
     project_id = Parameter("project_id", default=1, required=False)
     project_name = Parameter("project_name", default="rionowcast_precipitation", required=False)
 
-    # Gypscie processor parameters
-    processor_name = Parameter("processor_name", default="etl_alertario22", required=True)
-    dataset_processor_id = Parameter("dataset_processor_id", default=43, required=False)  # mudar
-
     # Parameters for saving data on GCP
     materialize_after_dump = Parameter("materialize_after_dump", default=False, required=False)
     dump_mode = Parameter("dump_mode", default=False, required=False)
@@ -152,7 +165,8 @@
         files_to_download=files_on_storage_list,
         destination_path="temp/",
     )
-    radar = task_open_radar_file(radar_files[0])
+    uncompressed_files = unzip_files(radar_files)
+    radar = task_open_radar_file(uncompressed_files[0])
     grid_shape, grid_limits = get_radar_parameters(radar)
     radar_2d = remap_data(radar, RADAR_PRODUCT_LIST, grid_shape, grid_limits)
 
@@ -267,9 +281,9 @@
     )
     # save_last_update_redis.set_upstream(upload_table)
 
-    ####################################
-    #  Start preprocessing flow        #
-    ####################################
+    ######################################
+    #  Start gypscie preprocessing flow  #
+    ######################################
 
     with case(preprocessing_gypscie, True):
         api_gypscie = access_api_gypscie()
@@ -283,35 +297,44 @@
             )
         # TODO: ao salvar o nome do radar_files salvar com sufixo treatment_version
         # pq te que ser unico no gypscie
-        dataset_response = register_dataset_on_gypscie(
-            api_gypscie, filepath=radar_files, domain_id=domain_id
+        # for now, all files to be processe has to have the name defined on default_value
+        # when the workflow was saved on gypscie. In this case default_value = "9921GUA_PPIVol.hdf"
+        # Gypscie will give a different name for zip file, but the inside file will have the name for all.
+        renamed_files = rename_files(
+            uncompressed_files, original_name=uncompressed_files[0], rename="9921GUA_PPIVol.hdf"
         )
-        # TODO: verifcar no codigo do augustp se são esses os parametros corretos
-        processor_parameters = {
-            "dataset1": str(dataset_path).rsplit("/", maxsplit=1)[-1],
-            "station_type": station_type,
-        }
-
-        dataset_processor_task_id = execute_dataset_processor(
-            api_gypscie,
-            processor_id=dataset_processor_id,
-            dataset_id=[dataset_response["id"]],
+        register_dataset_response = register_dataset_on_gypscie(
+            api_gypscie, filepath=renamed_files[0], domain_id=domain_id
+        )
+        model_params = get_dataflow_mendanha_params(
+            workflow_id=workflow_id,
             environment_id=environment_id,
             project_id=project_id,
-            parameters=processor_parameters,
+            radar_data_id=register_dataset_response["id"],
+            load_data_function_id=load_data_function_id,
+            filter_data_function_id=filter_data_function_id,
+            parse_date_time_function_id=parse_date_time_function_id,
+            agregate_data_function_id=aggregate_data_function_id,
+            save_data_function_id=save_data_function_id,
         )
-        wait_run = task_wait_run(api_gypscie, dataset_processor_task_id, flow_type="processor")
-        dataset_path = download_datasets_from_gypscie(
-            api_gypscie, dataset_names=[dataset_response["id"]], wait=wait_run
+
+        output_dataset_ids = execute_dataflow_on_gypscie(
+            api_gypscie,
+            model_params,
         )
-        dfr_ = path_to_dfr(dataset_path)
+        dataset_names = get_dataset_name_on_gypscie(api, output_dataset_ids)  # new
+        ziped_dataset_paths = download_datasets_from_gypscie(api, dataset_names=dataset_names)
+        dataset_paths = unzip_files(ziped_dataset_paths)
+        dfr_gypscie_ = path_to_dfr(dataset_paths)
         # output_datasets_id = get_output_dataset_ids_on_gypscie(api, dataset_processor_task_id)
-        dfr = add_caracterization_columns_on_dfr(dfr_, treatment_version, update_time=True)
+        dfr_gypscie = add_caracterization_columns_on_dfr(
+            dfr_gypscie_, model_version, update_time=True
+        )
 
         # Save pre-treated data on local file with partitions
         now_datetime = get_now_datetime()
-        prediction_data_path = task_create_partitions(
-            dfr,
+        prediction_data_path, prediction_data_full_path = task_create_partitions(
+            dfr_gypscie,
             partition_date_column=dataset_info["partition_date_column"],
             savepath="model_prediction",
             suffix=now_datetime,
diff --git a/pipelines/meteorologia/radar/mendanha/schedules.py b/pipelines/meteorologia/radar/mendanha/schedules.py
index d7202218..df218b59 100644
--- a/pipelines/meteorologia/radar/mendanha/schedules.py
+++ b/pipelines/meteorologia/radar/mendanha/schedules.py
@@ -28,6 +28,24 @@
                 "save_image_with_background": False,
                 "save_image_without_colorbar": True,
                 "save_image_with_colorbar": True,
+                "preprocessing_gypscie": True,
+                "workflow_id": 40,
+                "environment_id": 1,
+                "domain_id": 1,
+                "project_id": 1,
+                "project_name": "rionowcast_precipitation",
+                "processor_name": "etl_inea_radar",
+                "dataset_processor_id": 43,
+                "load_data_function_id": 46,
+                "filter_data_function_id": 47,
+                "parse_date_time_function_id": 48,
+                "aggregate_data_function_id": 49,
+                "save_data_function_id": 50,
+                "dataset_id_previsao_chuva": "clima_previsao_chuva",
+                "table_id_previsao_chuva": "preprocessamento_radar_mendanha",
+                "station_type": "radar",
+                "source": "mendanha",
+                "model_version": 1,
             },
         ),
     ]
diff --git a/pipelines/meteorologia/radar/mendanha/utils.py b/pipelines/meteorologia/radar/mendanha/utils.py
index 588cc953..ea8b2ad1 100644
--- a/pipelines/meteorologia/radar/mendanha/utils.py
+++ b/pipelines/meteorologia/radar/mendanha/utils.py
@@ -4,18 +4,16 @@
 General utils for setting rain dashboard using radar data.
 """
 import base64
-import gzip
 import io
 import os
 import re
-import shutil
 from datetime import datetime
 from pathlib import Path
 from typing import Union
 
 import matplotlib.colors as mcolors
 import numpy as np
-import pyart
+import pyart  # pylint: disable=E0611, E0401
 
 
 def extract_timestamp(filename) -> datetime:
@@ -48,12 +46,12 @@ def open_radar_file(file_path: Union[str, Path]) -> Union[pyart.core.Radar, None
         Radar object.
     """
     file_path = str(file_path)
-    if file_path.endswith(".gz"):
-        uncompressed_file_path = file_path[:-3]
-        with gzip.open(file_path, "rb") as f_in:
-            with open(uncompressed_file_path, "wb") as f_out:
-                shutil.copyfileobj(f_in, f_out)
-        file_path = uncompressed_file_path
+    # if file_path.endswith(".gz"):
+    #     uncompressed_file_path = file_path[:-3]
+    #     with gzip.open(file_path, "rb") as f_in:
+    #         with open(uncompressed_file_path, "wb") as f_out:
+    #             shutil.copyfileobj(f_in, f_out)
+    #     file_path = uncompressed_file_path
 
     try:
         opened_file = pyart.aux_io.read_odim_h5(file_path)

From b4ecf9ec682b0184d9905fb8e096b4d65caca3bd Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Mon, 4 Nov 2024 10:48:22 -0300
Subject: [PATCH 46/56] forgot to add other changes

---
 .../precipitacao_alertario/flows.py           |   7 +-
 .../precipitacao_alertario/tasks.py           |  30 ++--
 pipelines/utils/gypscie/tasks.py              | 132 +++++++++++++++---
 3 files changed, 135 insertions(+), 34 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 85a08f93..3ffe774d 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -59,6 +59,7 @@
     get_dataset_processor_info,
     path_to_dfr,
     register_dataset_on_gypscie,
+    rename_files,
     unzip_files,
 )
 
@@ -473,11 +474,13 @@
                 data_name="gypscie",
                 columns=["id_estacao", "data_medicao", "acumulado_chuva_5min"],
                 data_type="parquet",
-                rename="dados_alertario_raw",
                 suffix=False,
             )
+            full_path_pluviometric_gypscie_ = rename_files(
+                full_path_pluviometric_gypscie, rename="dados_alertario_raw"
+            )
             register_dataset_response = register_dataset_on_gypscie(
-                api, filepath=full_path_pluviometric_gypscie, domain_id=domain_id
+                api, filepath=full_path_pluviometric_gypscie_[0], domain_id=domain_id
             )
 
             model_params = get_dataflow_alertario_params(
diff --git a/pipelines/meteorologia/precipitacao_alertario/tasks.py b/pipelines/meteorologia/precipitacao_alertario/tasks.py
index cdbe5fc1..e29f4865 100644
--- a/pipelines/meteorologia/precipitacao_alertario/tasks.py
+++ b/pipelines/meteorologia/precipitacao_alertario/tasks.py
@@ -185,9 +185,9 @@ def save_data(
     columns: str = None,
     treatment_version: int = None,
     data_type: str = "csv",
-    preffix: str = None,
+    # preffix: str = None,
     suffix: bool = True,
-    rename: str = None,
+    # rename: str = None,
     wait=None,  # pylint: disable=unused-argument
 ) -> Tuple[Union[str, Path], Union[str, Path]]:
     """
@@ -217,21 +217,21 @@ def save_data(
         data_type=data_type,
         suffix=suffix,
     )
-    if preffix or rename:
-        log(f"Adding preffix {preffix} on {full_paths}")
-        new_paths = []
-        for full_path in full_paths:
-            change_filename = f"{preffix}_data" if preffix else rename
-            new_filename = full_path.name.replace("data", change_filename)
-            savepath = full_path.with_name(new_filename)
-
-            # Renomear o arquivo
-            full_path.rename(savepath)
-            new_paths.append(savepath)
-        full_paths = new_paths
+    # if preffix or rename:
+    #     log(f"Adding preffix {preffix} on {full_paths}")
+    #     new_paths = []
+    #     for full_path in full_paths:
+    #         change_filename = f"{preffix}_data" if preffix else rename
+    #         new_filename = full_path.name.replace("data", change_filename)
+    #         savepath = full_path.with_name(new_filename)
+
+    #         # Renomear o arquivo
+    #         full_path.rename(savepath)
+    #         new_paths.append(savepath)
+    #     full_paths = new_paths
     log(f"Files saved on {prepath}, full paths are {full_paths}")
     # TODO alterar funções seguintes para receberem uma lista em vez de ter o full_paths[0]
-    return prepath, full_paths[0]
+    return prepath, full_paths
 
 
 @task
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index 2b7f2417..f08c0724 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -4,11 +4,13 @@
 Tasks
 """
 import datetime
+import gzip
 import os
+import shutil
 import zipfile
 from pathlib import Path
 from time import sleep
-from typing import Dict, List
+from typing import Dict, List, Union
 
 import numpy as np
 import pandas as pd
@@ -417,6 +419,74 @@ def get_dataflow_alertario_params(  # pylint: disable=too-many-arguments
     }
 
 
+@task
+def get_dataflow_mendanha_params(  # pylint: disable=too-many-arguments
+    workflow_id,
+    environment_id,
+    project_id,
+    radar_data_id,
+    load_data_function_id,
+    filter_data_function_id,
+    parse_date_time_function_id,
+    agregate_data_function_id,
+    save_data_function_id,
+) -> List:
+    """
+    Return parameters for the Mendanha radar's ETL
+
+    data = {
+        "workflow_id": 40,
+        "environment_id": 1,
+        "parameters": [
+            {
+                "function_id":46,  # load_data
+                "params": {"radar_data_path":213}
+            },
+            {
+                "function_id":47  # filter_data
+            },
+            {
+                "function_id":48  # parse_date_time
+            },
+            {
+                "function_id":49  # aggregate_data
+            },
+            {
+                "function_id":50,  # save_data
+                "params": {"output_path":"9921GUA_PPIVol_20230112_190010_0000.parquet"}
+            }
+        ],
+        "project_id": 1
+    }
+    """
+    return {
+        "workflow_id": workflow_id,
+        "environment_id": environment_id,
+        "parameters": [
+            {
+                "function_id": load_data_function_id,
+                "params": {
+                    "radar_data_path": radar_data_id,
+                },
+            },
+            {
+                "function_id": filter_data_function_id,
+            },
+            {
+                "function_id": parse_date_time_function_id,
+            },
+            {
+                "function_id": agregate_data_function_id,
+            },
+            {
+                "function_id": save_data_function_id,
+                "params": {"output_path": "preprocessed_data_radar_mendanha.parquet"},
+            },
+        ],
+        "project_id": project_id,
+    }
+
+
 @task
 def get_dataflow_params(  # pylint: disable=too-many-arguments
     workflow_id,
@@ -544,27 +614,28 @@ def download_datasets_from_gypscie(
 
 
 @task
-def unzip_files(zip_files: List[str], destination_folder: str = "./") -> List[str]:
+def unzip_files(compressed_files: List[str], destination_folder: str = "./") -> List[str]:
     """
-    Unzip files to destination folder
+    Unzip .zip and .gz files to destination folder.
     """
-    zip_files = [
-        zip_file if zip_file.endswith(".zip") else zip_file + ".zip" for zip_file in zip_files
-    ]
     os.makedirs(destination_folder, exist_ok=True)
 
-    unziped_files = []
-    for zip_file in zip_files:
-        with zipfile.ZipFile(zip_file, "r") as zip_ref:
-            zip_ref.extractall(destination_folder)
-            unziped_files.extend(
-                [
-                    os.path.join(destination_folder, nome_arquivo)
-                    for nome_arquivo in zip_ref.namelist()
-                ]
-            )
+    extracted_files = []
+    for file in compressed_files:
+        if file.endswith(".zip"):
+            with zipfile.ZipFile(file, "r") as zip_ref:
+                zip_ref.extractall(destination_folder)
+                extracted_files.extend(
+                    [os.path.join(destination_folder, f) for f in zip_ref.namelist()]
+                )
+        elif file.endswith(".gz"):
+            output_file = os.path.join(destination_folder, os.path.basename(file)[:-3])
+            with gzip.open(file, "rb") as gz_file:
+                with open(output_file, "wb") as out_file:
+                    shutil.copyfileobj(gz_file, out_file)
+            extracted_files.append(output_file)
 
-    return unziped_files
+    return extracted_files
 
 
 @task
@@ -800,3 +871,30 @@ def convert_columns_type(
             dfr[col] = dfr[col].astype(new_type)
 
     return dfr
+
+
+@task
+def rename_files(
+    files: List[Union[Path, str]],
+    original_name: str = "data",
+    preffix: str = None,
+    rename: str = None,
+) -> List[Path]:
+    """
+    Renomeia os arquivos com base em um prefixo ou novo nome.
+    """
+    new_paths = []
+    for file_path in files:
+        file_path = Path(file_path)
+        print(f"Original file path: {file_path}")
+
+        change_filename = f"{preffix}_{original_name}" if preffix else rename
+        print(f"Name to replace '{original_name}' with: {change_filename}")
+        new_filename = file_path.name.replace(original_name, change_filename)
+        savepath = file_path.with_name(new_filename)
+
+        # Rename file
+        file_path.rename(savepath)
+        new_paths.append(savepath)
+        print(f"Renamed file paths: {new_paths}")
+    return new_paths

From 787aaf26cf16fc9ccb72df9075052c1bbf461a3d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 4 Nov 2024 13:48:47 +0000
Subject: [PATCH 47/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pipelines/meteorologia/precipitacao_alertario/flows.py | 10 ++--------
 pipelines/meteorologia/radar/mendanha/flows.py         |  3 ++-
 pipelines/tasks.py                                     |  2 +-
 pipelines/utils/gypscie/tasks.py                       |  6 +++---
 4 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/pipelines/meteorologia/precipitacao_alertario/flows.py b/pipelines/meteorologia/precipitacao_alertario/flows.py
index 3ffe774d..7cd17eb5 100644
--- a/pipelines/meteorologia/precipitacao_alertario/flows.py
+++ b/pipelines/meteorologia/precipitacao_alertario/flows.py
@@ -149,19 +149,13 @@
     #########################
 
     dfr_pluviometric, dfr_meteorological = download_data()
-    (
-        dfr_pluviometric,
-        empty_data_pluviometric,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_pluviometric, empty_data_pluviometric,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_pluviometric,
         dataset_id=DATASET_ID_PLUVIOMETRIC,
         table_id=TABLE_ID_PLUVIOMETRIC,
         mode=MATERIALIZATION_MODE,
     )
-    (
-        dfr_meteorological,
-        empty_data_meteorological,
-    ) = treat_pluviometer_and_meteorological_data(
+    (dfr_meteorological, empty_data_meteorological,) = treat_pluviometer_and_meteorological_data(
         dfr=dfr_meteorological,
         dataset_id=DATASET_ID_METEOROLOGICAL,
         table_id=TABLE_ID_METEOROLOGICAL,
diff --git a/pipelines/meteorologia/radar/mendanha/flows.py b/pipelines/meteorologia/radar/mendanha/flows.py
index e36433ff..39051b08 100644
--- a/pipelines/meteorologia/radar/mendanha/flows.py
+++ b/pipelines/meteorologia/radar/mendanha/flows.py
@@ -8,7 +8,8 @@
 from prefect.run_configs import KubernetesRun  # pylint: disable=E0611, E0401
 from prefect.storage import GCS  # pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.custom import Flow  # pylint: disable=E0611, E0401
-  # pylint: disable=E0611, E0401
+
+# pylint: disable=E0611, E0401
 from prefeitura_rio.pipelines_utils.state_handlers import handler_inject_bd_credentials
 from prefeitura_rio.pipelines_utils.tasks import (  # pylint: disable=E0611, E0401
     create_table_and_upload_to_gcs,
diff --git a/pipelines/tasks.py b/pipelines/tasks.py
index 801c43bc..cdaaad2a 100644
--- a/pipelines/tasks.py
+++ b/pipelines/tasks.py
@@ -6,7 +6,7 @@
 
 import json
 from pathlib import Path
-from typing import List, Union, Tuple
+from typing import List, Tuple, Union
 
 import pandas as pd
 import pendulum  # pylint: disable=E0611, E0401
diff --git a/pipelines/utils/gypscie/tasks.py b/pipelines/utils/gypscie/tasks.py
index f08c0724..3b6d6b97 100644
--- a/pipelines/utils/gypscie/tasks.py
+++ b/pipelines/utils/gypscie/tasks.py
@@ -778,9 +778,9 @@ def get_dataset_info(station_type: str, source: str) -> Dict:
         }
         if source == "alertario":
             dataset_info["table_id"] = "meteorologia_alertario"
-            dataset_info["destination_table_id"] = (
-                "preprocessamento_estacao_meteorologica_alertario"
-            )
+            dataset_info[
+                "destination_table_id"
+            ] = "preprocessamento_estacao_meteorologica_alertario"
         elif source == "inmet":
             dataset_info["table_id"] = "meteorologia_inmet"
             dataset_info["destination_table_id"] = "preprocessamento_estacao_meteorologica_inmet"

From 3c7329160f2e91fae802834d9306c154f14510cf Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Mon, 4 Nov 2024 11:42:37 -0300
Subject: [PATCH 48/56] trying to fix fail on flow registration

---
 pipelines/meteorologia/radar/__init__.py          | 2 ++
 pipelines/meteorologia/radar/mendanha/__init__.py | 7 +++++++
 pipelines/utils/dump_db/__init__.py               | 2 ++
 3 files changed, 11 insertions(+)

diff --git a/pipelines/meteorologia/radar/__init__.py b/pipelines/meteorologia/radar/__init__.py
index e69de29b..6917c4e7 100644
--- a/pipelines/meteorologia/radar/__init__.py
+++ b/pipelines/meteorologia/radar/__init__.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+from pipelines.meteorologia.radar.mendanha.flows import *  # noqa
\ No newline at end of file
diff --git a/pipelines/meteorologia/radar/mendanha/__init__.py b/pipelines/meteorologia/radar/mendanha/__init__.py
index e69de29b..faf414fd 100644
--- a/pipelines/meteorologia/radar/mendanha/__init__.py
+++ b/pipelines/meteorologia/radar/mendanha/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+"""
+Prefect flows for mendanha project
+"""
+###############################################################################
+# Automatically managed, please do not touch
+###############################################################################
diff --git a/pipelines/utils/dump_db/__init__.py b/pipelines/utils/dump_db/__init__.py
index e69de29b..ebb2e059 100644
--- a/pipelines/utils/dump_db/__init__.py
+++ b/pipelines/utils/dump_db/__init__.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+"""" Init file for dump_dbt module """

From ef7f92c071427e9d0901c042df54354b0c66b0db Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Mon, 4 Nov 2024 11:55:43 -0300
Subject: [PATCH 49/56] testing init

---
 pipelines/meteorologia/radar/__init__.py | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 pipelines/meteorologia/radar/__init__.py

diff --git a/pipelines/meteorologia/radar/__init__.py b/pipelines/meteorologia/radar/__init__.py
deleted file mode 100644
index 6917c4e7..00000000
--- a/pipelines/meteorologia/radar/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# -*- coding: utf-8 -*-
-from pipelines.meteorologia.radar.mendanha.flows import *  # noqa
\ No newline at end of file

From cf69f936c6511a5a1f612e641dc60c05ce916a92 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Mon, 4 Nov 2024 12:08:11 -0300
Subject: [PATCH 50/56] chore: force register

---
 .github/workflows/cd.yaml                | 2 +-
 pipelines/meteorologia/radar/__init__.py | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 pipelines/meteorologia/radar/__init__.py

diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml
index 53d4b5fb..68a0a29b 100644
--- a/.github/workflows/cd.yaml
+++ b/.github/workflows/cd.yaml
@@ -71,4 +71,4 @@ jobs:
 
       - name: Register Prefect flows
         run: |-
-          python .github/workflows/scripts/register_flows.py --project $PREFECT__SERVER__PROJECT --path pipelines/ --schedule --filter-affected-flows
+          python .github/workflows/scripts/register_flows.py --project $PREFECT__SERVER__PROJECT --path pipelines/ --schedule --no-filter-affected-flows
diff --git a/pipelines/meteorologia/radar/__init__.py b/pipelines/meteorologia/radar/__init__.py
new file mode 100644
index 00000000..e69de29b

From 8be7b2ff853c2101e9b7792cf25fff8362a6d508 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Mon, 4 Nov 2024 12:31:44 -0300
Subject: [PATCH 51/56] adding utils on init

---
 pipelines/meteorologia/radar/__init__.py | 1 +
 pipelines/utils/__init__.py              | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/pipelines/meteorologia/radar/__init__.py b/pipelines/meteorologia/radar/__init__.py
index e69de29b..be4d8fae 100644
--- a/pipelines/meteorologia/radar/__init__.py
+++ b/pipelines/meteorologia/radar/__init__.py
@@ -0,0 +1 @@
+from pipelines.meteorologia.radar.mendanha.flows import *  # noqa
diff --git a/pipelines/utils/__init__.py b/pipelines/utils/__init__.py
index 3aeae37e..842b94e8 100644
--- a/pipelines/utils/__init__.py
+++ b/pipelines/utils/__init__.py
@@ -4,3 +4,5 @@
 """
 
 from pipelines.utils.execute_dbt_model.flows import *
+from pipelines.utils.dump_db.flows import *
+from pipelines.utils.dump_to_gcs.flows import *

From 49b39f36a59a45313c37e3268a1cf3009142bbb6 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Mon, 4 Nov 2024 12:41:48 -0300
Subject: [PATCH 52/56] removing paralelism from flow

---
 pipelines/meteorologia/radar/mendanha/flows.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pipelines/meteorologia/radar/mendanha/flows.py b/pipelines/meteorologia/radar/mendanha/flows.py
index 39051b08..10f4d31f 100644
--- a/pipelines/meteorologia/radar/mendanha/flows.py
+++ b/pipelines/meteorologia/radar/mendanha/flows.py
@@ -87,8 +87,6 @@
 with Flow(
     name="COR: Meteorologia - Mapa de Refletividade Radar do Mendanha",
     state_handlers=[handler_inject_bd_credentials],
-    skip_if_running=False,
-    parallelism=100,
     # skip_if_running=True,
 ) as cor_meteorologia_refletividade_radar_men_flow:
 

From 11356e601cfd6c541126d3f470d3676287aae385 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Mon, 4 Nov 2024 13:04:08 -0300
Subject: [PATCH 53/56] changin flow run config

---
 pipelines/meteorologia/radar/mendanha/flows.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pipelines/meteorologia/radar/mendanha/flows.py b/pipelines/meteorologia/radar/mendanha/flows.py
index 10f4d31f..d40a8453 100644
--- a/pipelines/meteorologia/radar/mendanha/flows.py
+++ b/pipelines/meteorologia/radar/mendanha/flows.py
@@ -366,10 +366,6 @@
 cor_meteorologia_refletividade_radar_men_flow.run_config = KubernetesRun(
     image=constants.DOCKER_IMAGE.value,
     labels=[constants.RJ_COR_AGENT_LABEL.value],
-    cpu_request=1,
-    cpu_limit=1,
-    memory_request="2Gi",
-    memory_limit="3Gi",
 )
 
 cor_meteorologia_refletividade_radar_men_flow.schedule = TIME_SCHEDULE

From 6508243769a1a32f38e5908ccdf779ef41c029b8 Mon Sep 17 00:00:00 2001
From: patriciacatandi <patriciabcatandi@gmail.com>
Date: Mon, 4 Nov 2024 13:57:25 -0300
Subject: [PATCH 54/56] rolling back modification on init

---
 pipelines/meteorologia/radar/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pipelines/meteorologia/radar/__init__.py b/pipelines/meteorologia/radar/__init__.py
index be4d8fae..e69de29b 100644
--- a/pipelines/meteorologia/radar/__init__.py
+++ b/pipelines/meteorologia/radar/__init__.py
@@ -1 +0,0 @@
-from pipelines.meteorologia.radar.mendanha.flows import *  # noqa

From e80364a27172c5981f4cddcff23cab4894c9ba1c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 4 Nov 2024 16:57:45 +0000
Subject: [PATCH 55/56] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pipelines/utils/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/utils/__init__.py b/pipelines/utils/__init__.py
index 842b94e8..7ca8bff5 100644
--- a/pipelines/utils/__init__.py
+++ b/pipelines/utils/__init__.py
@@ -3,6 +3,6 @@
 Helper flows that could fit any pipeline.
 """
 
-from pipelines.utils.execute_dbt_model.flows import *
 from pipelines.utils.dump_db.flows import *
 from pipelines.utils.dump_to_gcs.flows import *
+from pipelines.utils.execute_dbt_model.flows import *

From a3cea13cdb0d6656313beeb23878d74c075e509f Mon Sep 17 00:00:00 2001
From: Gabriel Gazola Milan <gabriel.gazola@poli.ufrj.br>
Date: Mon, 4 Nov 2024 15:20:10 -0300
Subject: [PATCH 56/56] fix: variable name

---
 pipelines/meteorologia/radar/mendanha/flows.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pipelines/meteorologia/radar/mendanha/flows.py b/pipelines/meteorologia/radar/mendanha/flows.py
index d40a8453..74946a4d 100644
--- a/pipelines/meteorologia/radar/mendanha/flows.py
+++ b/pipelines/meteorologia/radar/mendanha/flows.py
@@ -165,12 +165,12 @@
         destination_path="temp/",
     )
     uncompressed_files = unzip_files(radar_files)
-    radar = task_open_radar_file(uncompressed_files[0])
-    grid_shape, grid_limits = get_radar_parameters(radar)
-    radar_2d = remap_data(radar, RADAR_PRODUCT_LIST, grid_shape, grid_limits)
+    radar_file = task_open_radar_file(uncompressed_files[0])
+    grid_shape, grid_limits = get_radar_parameters(radar_file)
+    radar_2d = remap_data(radar_file, RADAR_PRODUCT_LIST, grid_shape, grid_limits)
 
     # Create visualizations
-    formatted_time, filename_time = get_and_format_time(radar)
+    formatted_time, filename_time = get_and_format_time(radar_file)
     cbar_title = get_colorbar_title(RADAR_PRODUCT_LIST[0])
     fig = create_visualization_no_background(
         radar_2d, radar_product=RADAR_PRODUCT_LIST[0], cbar_title=cbar_title, title=formatted_time