Skip to content

Commit

Permalink
Tidy up
Browse files Browse the repository at this point in the history
  • Loading branch information
MariusWirtz committed Sep 22, 2021
1 parent af1727b commit 69ab6f7
Showing 1 changed file with 31 additions and 60 deletions.
91 changes: 31 additions & 60 deletions TM1py/Services/CellService.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import functools
import itertools
import json
import sys
import time
import uuid
import warnings
from collections import OrderedDict
Expand Down Expand Up @@ -37,27 +35,6 @@
_has_pandas = False


def get_size(obj, seen=None):
"""Recursively finds size of objects"""
size = sys.getsizeof(obj)
if seen is None:
seen = set()
obj_id = id(obj)
if obj_id in seen:
return 0
# Important mark as seen *before* entering recursion to gracefully handle
# self-referential objects
seen.add(obj_id)
if isinstance(obj, dict):
size += sum([get_size(v, seen) for v in obj.values()])
size += sum([get_size(k, seen) for k in obj.keys()])
elif hasattr(obj, '__dict__'):
size += get_size(obj.__dict__, seen)
elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
size += sum([get_size(i, seen) for i in obj])
return size


def tidy_cellset(func):
""" Higher order function to tidy up cellset after usage
"""
Expand Down Expand Up @@ -1995,85 +1972,78 @@ def extract_cellset_csv_large(
:param include_attributes: include attribute columns
:return: Raw format from TM1.
"""
_, _, rows, columns = self.extract_cellset_composition(cellset_id, delete_cellset=False,
sandbox_name=sandbox_name, **kwargs)
_, _, rows, columns = self.extract_cellset_composition(
cellset_id,
delete_cellset=False,
sandbox_name=sandbox_name,
**kwargs)

# try a different approach, as JSON into dict blows up memory by 10x
# first get cellset as JSON string
print('Retrieve cellset over web')
start_time = time.perf_counter()
cellset_response = self.extract_cellset_raw_response(cellset_id, cell_properties=["Value"], top=top, skip=skip,
skip_contexts=True, skip_zeros=skip_zeros,
skip_consolidated_cells=skip_consolidated_cells,
skip_rule_derived_cells=skip_rule_derived_cells,
delete_cellset=True, sandbox_name=sandbox_name,
elem_properties=['Name'],
member_properties=['Name',
'Attributes'] if include_attributes else None,
**kwargs)
print("Get everything web retrieve took %s seconds" % (time.perf_counter() - start_time))
cellset_response = self.extract_cellset_raw_response(
cellset_id, cell_properties=["Value"], top=top, skip=skip,
skip_contexts=True, skip_zeros=skip_zeros,
skip_consolidated_cells=skip_consolidated_cells,
skip_rule_derived_cells=skip_rule_derived_cells,
delete_cellset=True,
sandbox_name=sandbox_name,
elem_properties=['Name'],
member_properties=['Name', 'Attributes'] if include_attributes else None,
**kwargs)

# start parsing of JSON directly into CSV
print('Start parse JSON into CSV')
dimension_list = []
axes0_list = []
axes1_list = []
current_axes = 0
current_tuple = 0
current_cell_ordinal = 0
csv_lines = []
csv = ''
start_time = time.perf_counter()

parser = ijson.parse(cellset_response.content)
prefixes_of_interest = ['Cells.item.Value', 'Axes.item.Tuples.item.Members.item.Element.Name',
'Cells.item.Ordinal', 'Axes.item.Tuples.item.Ordinal', 'Cube.Dimensions.item.Name',
'Axes.item.Ordinal']
gen = ((prefix, event, value) for prefix, event, value in parser if prefix in prefixes_of_interest)
start_time = time.perf_counter()
for prefix, event, value in gen:
# print('Prefix:%s Event: %s Value:%s'%(prefix, event, value))
if (prefix) == ('Cells.item.Value'):
if prefix == 'Cells.item.Value':
q, r = divmod(current_cell_ordinal, len(axes0_list))
axes0_index = r
axes1_index = q
if len(axes0_list) == 1 and axes0_list[0] == '':
csv_lines.append('~'.join([axes1_list[axes1_index], str(value)]))
csv_lines.append(value_separator.join([axes1_list[axes1_index], str(value)]))
else:
csv_lines.append('~'.join([axes1_list[axes1_index], axes0_list[axes0_index], str(value)]))
csv_lines.append(value_separator.join([axes1_list[axes1_index], axes0_list[axes0_index], str(value)]))

elif (prefix, event) == ('Axes.item.Tuples.item.Members.item.Element.Name', 'string'):
if current_axes == 0:
axes0_list[current_tuple] += ('' if axes0_list[current_tuple] == '' else '~') + value
axes0_list[current_tuple] += ('' if axes0_list[current_tuple] == '' else value_separator) + value
else:
axes1_list[current_tuple] += ('' if axes1_list[current_tuple] == '' else '~') + value
axes1_list[current_tuple] += ('' if axes1_list[current_tuple] == '' else value_separator) + value

elif (prefix, event) == ('Cells.item.Ordinal', 'number'):
current_cell_ordinal = value

elif (prefix, event) == ('Axes.item.Tuples.item.Ordinal', 'number'):
current_tuple = value
if current_axes == 0:
axes0_list.append('')
else:
axes1_list.append('')

elif (prefix, event) == ('Cube.Dimensions.item.Name', 'string'):
dimension_list.append(value)

elif (prefix, event) == ('Axes.item.Ordinal', 'number'):
# write out csv header if we haven't yet
if len(csv_lines) == 0:
dimension_list.append('Value')
csv_lines.append('~'.join(dimension_list))
csv_lines.append(value_separator.join(dimension_list))
current_axes = value
# remove last cr from csv
print("Parse JSON to CSV took %s seconds" % (time.perf_counter() - start_time))
start_time = time.perf_counter()
csv = '\r\n'.join(csv_lines)
print("CSV join took %s seconds" % (time.perf_counter() - start_time))

csv = line_separator.join(csv_lines)

# close response
cellset_response.close()

# f = open('c:/temp/temp.csv','w')
# f.write(csv)
# f.close()

return csv

@require_pandas
Expand All @@ -2087,7 +2057,8 @@ def extract_cellset_dataframe(
skip_rule_derived_cells: bool = False,
sandbox_name: str = None,
include_attributes: bool = False,
stream_decode: bool = False,
# ToDo: set back to False. Only True to channel test cases against new function
stream_decode: bool = True,
**kwargs) -> 'pd.DataFrame':
""" Build pandas data frame from cellset_id
Expand Down

0 comments on commit 69ab6f7

Please sign in to comment.