Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/fix numeros #130

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions src/fundamentus/detalhes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@

import requests
import requests_cache
import pandas as pd
import pandas as pd
import time
import logging, sys
from concurrent.futures import ThreadPoolExecutor

from collections import OrderedDict

Expand Down Expand Up @@ -55,13 +56,9 @@ def get_detalhes_list(lst):
Output: DataFrame
"""

result = pd.DataFrame()

# build result for each get
for papel in lst:
logging.info('get list: [Papel: {}]'.format(papel))
df = get_detalhes_papel(papel)
result = result.append(df)
with ThreadPoolExecutor() as executor:
result = executor.map(get_papel, lst)
result = pd.concat(result)

# duplicate column (papel is the index already)
try:
Expand Down
39 changes: 20 additions & 19 deletions src/fundamentus/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@

import requests
import requests_cache
import pandas as pd
import pandas as pd
import logging

from tabulate import tabulate
from datetime import datetime
from dateutil.parser import parse
from pandas import Series


#
Expand Down Expand Up @@ -65,7 +66,7 @@ def from_pt_br(val):
return res


def fmt_dec(val):
def fmt_dec(val: Series):
"""
Fix percent:
- replace string in pt-br
Expand All @@ -74,34 +75,34 @@ def fmt_dec(val):
Input:
Series, i.e., a DataFrame column
"""
return pd.to_numeric(val.apply(parse_number_in_portuguese_locale))

res = val
res = res.replace( to_replace=r'[.]', value='' , regex=True )
res = res.replace( to_replace=r'[,]', value='.', regex=True )
# res = res.astype(float)
# res = res.astype(float) / 100
# res = '{:4.2f}%'.format(res)

return res
def parse_number_in_portuguese_locale(no: str) -> float:
"""
Parse numbers to float
Percentages also will be parsed to float (3% to 3e-2)

Input:
A number string
"""
try:
no = no.replace('%', 'e-2').replace(".", "").replace(",", ".")
return float(no)
except:
logging.error(f"Error: Unable to parse the number. {no}")
return None

def perc_to_float(val):
def perc_to_float(val: Series):
"""
Percent to float
- replace string in pt-br to float
- from '45,56%' to 0.4556

Input:
(DataFrame, column_name)
Series
"""

res = val
res = res.replace( to_replace=r'[%]', value='' , regex=True )
res = res.replace( to_replace=r'[.]', value='' , regex=True )
res = res.replace( to_replace=r'[,]', value='.', regex=True )
res = res.astype(float) / 100

return res
return fmt_dec(val)


def print_csv(data):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_setor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

###
@pytest.mark.parametrize(
'param',[ pytest.param(35, id='fin' )
'param',[ pytest.param(36, id='div' )
, pytest.param(38, id='seg' )
, pytest.param(38, id='prev')
])
Expand Down
4 changes: 2 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_fmt_dec():
'col3': [ 13,23]}
b = { 'data': [ '45,56%','1.045,56%' ]}
b.update(more_data)
a = { 'data': [ '45.56%','1045.56%' ]}
a = { 'data': [ 45.56e-2,1045.56e-2 ]}
a.update(more_data)

_before = pd.DataFrame(b)
Expand All @@ -68,7 +68,7 @@ def test_perc_to_float():
'col3': [ 13,23]}
b = { 'data': [ '45,56%','1.045,56%' ]}
b.update(more_data)
a = { 'data': [ 0.4556, 10.4556 ]}
a = { 'data': [45.56e-2, 1045.56e-2]}
a.update(more_data)

_before = pd.DataFrame(b)
Expand Down
15 changes: 15 additions & 0 deletions tests/test_utils_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,21 @@ def test_from_pt_br_02(self):

pd.testing.assert_frame_equal(_test.to_frame(), _after)

constants = [
("1.000.000", 1000000),
("1,23", 1.23),
("1.234,56", 1234.56),
("1,23%", 1.23e-2)
]

class Test_parse_numbers(unittest.TestCase):
def test_numbers(self):
for tuples in constants:
self.assertAlmostEqual(
utils.parse_number_in_portuguese_locale(tuples[0]),
tuples[1])



if __name__ == '__main__':

Expand Down