Skip to content

Commit

Permalink
[Enh] Logging and progress bar output (#14)
Browse files Browse the repository at this point in the history
* Changed logging output to debug

* Show tqdm progress bar during serialization

* Added parameter to control showing the progress bar

* Changed ProvenanceGraph print outputs to logging calls

* Added unit tests to check behavior of showing or not serialization progress bar

---------

Co-authored-by: Cristiano Köhler <c.koehler@fz-juelich.de>
  • Loading branch information
kohlerca and Cristiano Köhler authored Aug 7, 2023
1 parent f135c78 commit 5d5d617
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 17 deletions.
29 changes: 20 additions & 9 deletions alpaca/decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def _capture_code_and_function_provenance(self, lineno, function):
source_line = \
self._source_code.extract_multiline_statement(lineno)
ast_tree = ast.parse(source_line)
logger.info(f"Line {lineno} -> {source_line}")
logger.debug(f"Line {lineno} -> {source_line}")

# 2. Check if there is an assignment to one or more
# variables. This will be used to identify if there are
Expand Down Expand Up @@ -454,7 +454,7 @@ def wrapped(*args, **kwargs):

builtin_object_hash = _ALPACA_SETTINGS[
'use_builtin_hash_for_module']
logging.debug(f"Builtin object hash: {builtin_object_hash}")
logger.debug(f"Builtin object hash: {builtin_object_hash}")

lineno = None

Expand Down Expand Up @@ -579,11 +579,18 @@ def _set_calling_frame(cls, frame):
cls.script_info = _FileInformation(cls.source_file).info()

@classmethod
def get_prov_info(cls):
def get_prov_info(cls, show_progress=False):
"""
Returns the representation of the captured provenance information
Returns the RDF representation of the captured provenance information
according to the Alpaca ontology based on the W3C PROV-O.
Parameters
----------
show_progress : bool, optional
If True, show a bar with the progress of the conversion of the
captured provenance information to RDF.
Default: False
Returns
-------
serialization.AlpacaProvDocument
Expand All @@ -592,7 +599,8 @@ def get_prov_info(cls):
prov_document = AlpacaProvDocument()
prov_document.add_history(script_info=cls.script_info,
session_id=cls.session_id,
history=cls.history)
history=cls.history,
show_progress=show_progress)
return prov_document

@classmethod
Expand Down Expand Up @@ -646,10 +654,10 @@ def print_history():
pprint(Provenance.history)


def save_provenance(file_name=None, file_format='ttl'):
def save_provenance(file_name=None, file_format='ttl',show_progress=False):
"""
Serialize provenance information according to the Alpaca ontology based
on the W3C PROV Ontology (PROV-O).
Serialize provenance information to RDF according to the Alpaca ontology
based on the W3C PROV Ontology (PROV-O).
Parameters
----------
Expand All @@ -668,6 +676,9 @@ def save_provenance(file_name=None, file_format='ttl'):
* 'json': JSON-LD
Default: 'ttl'
show_progress : bool, optional
If True, show a bar with the progress of the serialization to RDF.
Default: False
Returns
-------
Expand All @@ -683,6 +694,6 @@ def save_provenance(file_name=None, file_format='ttl'):
if file_format in RDF_FILE_FORMAT_MAP:
file_format = RDF_FILE_FORMAT_MAP[file_format]

prov_document = Provenance.get_prov_info()
prov_document = Provenance.get_prov_info(show_progress=show_progress)
prov_data = prov_document.serialize(file_name, file_format=file_format)
return prov_data
14 changes: 12 additions & 2 deletions alpaca/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import re
from itertools import chain
from collections import defaultdict
import logging

import networkx as nx
from networkx.algorithms.summarization import (_snap_eligible_group,
Expand All @@ -33,6 +34,15 @@
from alpaca.utils.files import _get_file_format


# Create logger and set configuration
logger = logging.getLogger(__file__)
log_handler = logging.StreamHandler()
log_handler.setFormatter(logging.Formatter("[%(asctime)s] alpaca.graph -"
" %(levelname)s: %(message)s"))
logger.addHandler(log_handler)
logger.propagate = False


# String constants to use in the output
# These may be added to the names of `NameValuePair` information
PREFIX_ATTRIBUTE = "attribute"
Expand Down Expand Up @@ -307,7 +317,7 @@ def _transform_graph(graph, annotations=None, attributes=None,
transformed = nx.DiGraph()
none_nodes = []

print("Creating nodes")
logger.debug("Creating nodes")

# Copy all the Entity nodes, while adding the requested attributes and
# annotations as node data.
Expand All @@ -323,11 +333,11 @@ def _transform_graph(graph, annotations=None, attributes=None,
strip_namespace=strip_namespace)
transformed.add_node(node_id, **data)

print("Creating edges")
# Add all the edges.
# If usage/generation, create additional nodes for the function call,
# with the parameters as node data.
# If membership, membership flag is set to True, as this will be used.
logger.debug("Creating edges")

for s, func_execution in graph.subject_objects(PROV.wasGeneratedBy):

Expand Down
16 changes: 11 additions & 5 deletions alpaca/serialization/prov.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from alpaca.alpaca_types import DataObject, File, Container
from alpaca.settings import _ALPACA_SETTINGS

from tqdm import tqdm

def _add_name_value_pair(graph, uri, predicate, name, value):
# Add a relationship defined by `predicate` using a blank node as object.
Expand Down Expand Up @@ -286,10 +287,11 @@ def _is_membership(function_info):
# Associate the activity to the script
self._wasAssociatedWith(activity=cur_activity, agent=script_agent)

def add_history(self, script_info, session_id, history):
def add_history(self, script_info, session_id, history,
show_progress=False):
"""
Adds a history of `FunctionExecution` records captured by Alpaca to a
PROV document using the Alpaca PROV ontology. The script is added as
Adds a history of `FunctionExecution` records captured by Alpaca to an
RDF document using the Alpaca PROV ontology. The script is added as
a `ScriptAgent` agent.
Parameters
Expand All @@ -300,10 +302,14 @@ def add_history(self, script_info, session_id, history):
session_id : str
Unique identifier for this script execution.
history : list of FunctionExecution
Provenance history to be serialized as PROV.
Provenance history to be serialized as RDF using PROV.
show_progress : bool, optional
If True, show the progress of the provenance history serialization.
Default: False
"""
script_agent = self._add_ScriptAgent(script_info, session_id)
for execution in history:
for execution in tqdm(history, desc="Serializing provenance history",
disable=not show_progress):
self._add_function_execution(execution, script_agent, script_info,
session_id)

Expand Down
31 changes: 31 additions & 0 deletions alpaca/test/test_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,37 @@ def test_activate_deactivate(self):
self.assertEqual(Provenance.history[1].code_statement,
"simple_function(TEST_ARRAY, 3, 4)")

def test_save_provenance_show_progress(self):
activate(clear=True)
res = simple_function(TEST_ARRAY, 1, 2)
deactivate()

# Capture STDERR and serialize
captured = StringIO()
sys.stderr = captured
save_provenance(file_name=None, show_progress=True)
sys.stderr = sys.__stderr__

captured_stderr = captured.getvalue()

self.assertTrue("Serializing provenance history: 100%" in
captured_stderr)

def test_save_provenance_no_progress(self):
activate(clear=True)
res = simple_function(TEST_ARRAY, 1, 2)
deactivate()

# Capture STDERR and serialize
captured = StringIO()
sys.stderr = captured
save_provenance(file_name=None)
sys.stderr = sys.__stderr__

captured_stderr = captured.getvalue()

self.assertEqual(captured_stderr, "")

def test_save_provenance(self):
activate(clear=True)
res = simple_function(TEST_ARRAY, 1, 2)
Expand Down
3 changes: 2 additions & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ numpy
networkx>=2.6
dill==0.3.3
joblib>=1.2.0
rdflib>=6.0
rdflib>=6.0
tqdm

0 comments on commit 5d5d617

Please sign in to comment.