From ff7e2d30b86361ed22c1e3bbc25f83f6055795b9 Mon Sep 17 00:00:00 2001 From: Gustavo Souza Date: Fri, 11 Aug 2023 18:02:21 -0300 Subject: [PATCH 1/2] Melhorar velocidade de get_papel --- src/fundamentus/detalhes.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/fundamentus/detalhes.py b/src/fundamentus/detalhes.py index aaeb1d0..3cf3a37 100644 --- a/src/fundamentus/detalhes.py +++ b/src/fundamentus/detalhes.py @@ -14,9 +14,10 @@ import requests import requests_cache -import pandas as pd +import pandas as pd import time import logging, sys +from concurrent.futures import ThreadPoolExecutor from collections import OrderedDict @@ -55,13 +56,9 @@ def get_detalhes_list(lst): Output: DataFrame """ - result = pd.DataFrame() - - # build result for each get - for papel in lst: - logging.info('get list: [Papel: {}]'.format(papel)) - df = get_detalhes_papel(papel) - result = result.append(df) + with ThreadPoolExecutor() as executor: + result = executor.map(get_papel, lst) + result = pd.concat(result) # duplicate column (papel is the index already) try: From 40b210bb98d3497ec9129c1d8edb17c8a621eae7 Mon Sep 17 00:00:00 2001 From: Gustavo Souza Date: Fri, 11 Aug 2023 23:30:43 -0300 Subject: [PATCH 2/2] fix numeros --- src/fundamentus/utils.py | 39 ++++++++++++++++++------------------ tests/test_setor.py | 2 +- tests/test_utils.py | 4 ++-- tests/test_utils_unittest.py | 15 ++++++++++++++ 4 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/fundamentus/utils.py b/src/fundamentus/utils.py index 1f758c5..8ecbbb4 100644 --- a/src/fundamentus/utils.py +++ b/src/fundamentus/utils.py @@ -6,12 +6,13 @@ import requests import requests_cache -import pandas as pd +import pandas as pd import logging from tabulate import tabulate from datetime import datetime from dateutil.parser import parse +from pandas import Series # @@ -65,7 +66,7 @@ def from_pt_br(val): return res -def fmt_dec(val): +def fmt_dec(val: Series): """ Fix percent: - replace string in pt-br @@ -74,34 +75,34 @@ def fmt_dec(val): Input: Series, i.e., a DataFrame column """ + return pd.to_numeric(val.apply(parse_number_in_portuguese_locale)) - res = val - res = res.replace( to_replace=r'[.]', value='' , regex=True ) - res = res.replace( to_replace=r'[,]', value='.', regex=True ) -# res = res.astype(float) -# res = res.astype(float) / 100 -# res = '{:4.2f}%'.format(res) - return res +def parse_number_in_portuguese_locale(no: str) -> float: + """ + Parse numbers to float + Percentages also will be parsed to float (3% to 3e-2) + Input: + A number string + """ + try: + no = no.replace('%', 'e-2').replace(".", "").replace(",", ".") + return float(no) + except: + logging.error(f"Error: Unable to parse the number. {no}") + return None -def perc_to_float(val): +def perc_to_float(val: Series): """ Percent to float - replace string in pt-br to float - from '45,56%' to 0.4556 Input: - (DataFrame, column_name) + Series """ - - res = val - res = res.replace( to_replace=r'[%]', value='' , regex=True ) - res = res.replace( to_replace=r'[.]', value='' , regex=True ) - res = res.replace( to_replace=r'[,]', value='.', regex=True ) - res = res.astype(float) / 100 - - return res + return fmt_dec(val) def print_csv(data): diff --git a/tests/test_setor.py b/tests/test_setor.py index 0f2481c..3e17061 100644 --- a/tests/test_setor.py +++ b/tests/test_setor.py @@ -7,7 +7,7 @@ ### @pytest.mark.parametrize( - 'param',[ pytest.param(35, id='fin' ) + 'param',[ pytest.param(36, id='div' ) , pytest.param(38, id='seg' ) , pytest.param(38, id='prev') ]) diff --git a/tests/test_utils.py b/tests/test_utils.py index 55bdcb8..86e2f12 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -51,7 +51,7 @@ def test_fmt_dec(): 'col3': [ 13,23]} b = { 'data': [ '45,56%','1.045,56%' ]} b.update(more_data) - a = { 'data': [ '45.56%','1045.56%' ]} + a = { 'data': [ 45.56e-2,1045.56e-2 ]} a.update(more_data) _before = pd.DataFrame(b) @@ -68,7 +68,7 @@ def test_perc_to_float(): 'col3': [ 13,23]} b = { 'data': [ '45,56%','1.045,56%' ]} b.update(more_data) - a = { 'data': [ 0.4556, 10.4556 ]} + a = { 'data': [45.56e-2, 1045.56e-2]} a.update(more_data) _before = pd.DataFrame(b) diff --git a/tests/test_utils_unittest.py b/tests/test_utils_unittest.py index 97c6269..5bcfaa3 100644 --- a/tests/test_utils_unittest.py +++ b/tests/test_utils_unittest.py @@ -33,6 +33,21 @@ def test_from_pt_br_02(self): pd.testing.assert_frame_equal(_test.to_frame(), _after) +constants = [ + ("1.000.000", 1000000), + ("1,23", 1.23), + ("1.234,56", 1234.56), + ("1,23%", 1.23e-2) + ] + +class Test_parse_numbers(unittest.TestCase): + def test_numbers(self): + for tuples in constants: + self.assertAlmostEqual( + utils.parse_number_in_portuguese_locale(tuples[0]), + tuples[1]) + + if __name__ == '__main__':