Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pp parser memory fix #587

Merged
merged 6 commits into from
Oct 15, 2020
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 19 additions & 22 deletions aiida_quantumespresso/parsers/pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ def parse(self, **kwargs):
except (IOError, OSError):
return self.exit_codes.ERROR_OUTPUT_STDOUT_READ

data_raw = []

# Currently all plot output files should start with the `filplot` as prefix. If only one file was produced the
# prefix is the entire filename, but in the case of multiple files, there will be pairs of two files where the
# first has the format '{filename_prefix}.{some_random_suffix' and the second has the same name but with the
Expand All @@ -88,20 +86,6 @@ def parse(self, **kwargs):
filenames = retrieved.list_object_names()
file_opener = retrieved.open

for filename in filenames:
if filename.endswith(filename_suffix):
try:
with file_opener(filename) as handle:
data_raw.append((filename, handle.read()))
except OSError:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)

# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
# should be retrieved there really is no way to check this explicitly.
if not data_raw:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)

try:
logs, self.output_parameters = self.parse_stdout(stdout_raw)
except Exception:
Expand Down Expand Up @@ -140,12 +124,25 @@ def get_key_from_filename(filename):
matches = re.search(pattern, filename)
return matches.group(1)

for filename, data in data_raw:
try:
key = get_key_from_filename(filename)
data_parsed.append((key, parsers[iflag](data)))
except Exception: # pylint: disable=broad-except
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename)
for filename in filenames:
eimrek marked this conversation as resolved.
Show resolved Hide resolved
if filename.endswith(filename_suffix):
try:
with file_opener(filename) as handle:
data_raw = handle.read()
except OSError:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)

try:
key = get_key_from_filename(filename)
data_parsed.append((key, parsers[iflag](data_raw)))
eimrek marked this conversation as resolved.
Show resolved Hide resolved
except Exception: # pylint: disable=broad-except
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename)

# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
# should be retrieved there really is no way to check this explicitly.
if not data_parsed:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)

# Create output nodes
if len(data_parsed) == 1:
Expand Down