From c687c8dbb7b74568802585d44e9f8a45d6e415bb Mon Sep 17 00:00:00 2001 From: Jon Ander Oribe Date: Tue, 21 Feb 2023 13:15:58 +0100 Subject: [PATCH] Update for cases --- models/case.py | 15 +++++++-------- scrapper/data_scrapper.py | 3 +++ scrapper/supreme_court_spain.py | 2 ++ setup.py | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/models/case.py b/models/case.py index a062f01..af0c0c1 100644 --- a/models/case.py +++ b/models/case.py @@ -1,10 +1,9 @@ class Case: - def __init__(self, case_ref,case_data:str): - self.case_ref:str = case_ref + def __init__(self, case_data:str): self.resolution_type:str = case_data['TIPO_RESOLUCION'] - self.resolution_number:str = case_data['NUMERO_RESOLUCION'] - self.resolution_year:str = case_data['ANNO_RESOLUCION'] - self.resolution_BIS:str = case_data['BIS_RESOLUCION'] + self.resolution_number:int = case_data['NUMERO_RESOLUCION'] + self.resolution_year:int = case_data['ANNO_RESOLUCION'] + self.resolution_BIS:bool = case_data['BIS_RESOLUCION'] self.registration_date:str = case_data['FECHA_REGISTRO'] self.numeric_type:str = case_data['TIPO_NUMERADO'] @@ -13,15 +12,15 @@ def __init__(self, case_ref,case_data:str): self.descriptive_synthesis:str = case_data['SINTESIS_DESCRIPTIVA'] self.analytic_synthesis:str = case_data['SINTESIS_ANALITICA'] - self.boe_number:str = case_data['NUMERO_BOE'] + self.boe_number:int = case_data['NUMERO_BOE'] self.boe_date:str = case_data['FECHA_BOE'] - self.green_tome_number:str = case_data['NUMERO_TOMO_VERDE'] + self.green_tome_number:int = case_data['NUMERO_TOMO_VERDE'] self.signature_date:str = case_data['FECHA_FIRMA'] self.boe_reference:str = case_data['REFERENCIA_BOE'] self.xml_boe_corrections:str = case_data['XML_CORRECCIONES_BOE'] self.last_update:str = case_data['ULTIMA_ACTUALIZACION'] - self.content_irrelevant_for_internet:str = case_data['CONTENIDO_IRRELEVANTE_PARA_INTERNET'] + self.content_irrelevant_for_internet:bool = case_data['CONTENIDO_IRRELEVANTE_PARA_INTERNET'] self.cache_date:str = case_data['FECHA_CACHE'] diff --git a/scrapper/data_scrapper.py b/scrapper/data_scrapper.py index ddc2d4f..86a3449 100644 --- a/scrapper/data_scrapper.py +++ b/scrapper/data_scrapper.py @@ -5,6 +5,7 @@ from models.abstract import Abstract from models.article import Article from models.background import Background +from models.case import Case from models.dictum import Dictum from models.fundamentals import Fundamentals from models.header import Header @@ -17,6 +18,7 @@ class DataScrapper: def __init__(self, source) -> None: self.source:str = {section: dict(config.items(section)) for section in config.sections()}['URLS'][source.lower()] + self.cases:list[Case] = [] self.magistrates:list[Magistrate] = [] self.backgrounds:list[Background] = [] self.articles:list[Article] = [] @@ -35,6 +37,7 @@ def _save_data(self, file_name:str, data, format:str, output_path:str): json.dump(data, outfile) if format == 'graph': case_ref = data['REFERENCIA_BOE'] + self.cases.append(Case(data)) for elem in data['RESOLUCIONES_MAGISTRADOS']: self.magistrates.append(Magistrate(case_ref,elem)) for elem in data['RESOLUCIONES_ANTECEDENTES']: diff --git a/scrapper/supreme_court_spain.py b/scrapper/supreme_court_spain.py index ef8eb5c..2263f5b 100644 --- a/scrapper/supreme_court_spain.py +++ b/scrapper/supreme_court_spain.py @@ -10,6 +10,8 @@ def __init__(self,source:str, case_ids:List[str]): self.case_ids:List = case_ids def prepare_csv(self,output_path:str): + cases_json = json.loads(json.dumps([ob.__dict__ for ob in self.cases])) + pd.DataFrame.from_records(cases_json).to_csv(output_path+'cases.csv',index=False) magistrates_json = json.loads(json.dumps([ob.__dict__ for ob in self.magistrates])) pd.DataFrame.from_records(magistrates_json).to_csv(output_path+'magistrates.csv',index=False) backgrounds_json = json.loads(json.dumps([ob.__dict__ for ob in self.backgrounds])) diff --git a/setup.py b/setup.py index e46fb81..c8631e3 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ # This call to setup() does all the work setup( name="lawLib", - version="0.5.1", + version="0.5.2", description="Library to extract legal information from official resources", long_description=long_description,