From 9ef620c406943aecca9cf331e292ec9507efeab4 Mon Sep 17 00:00:00 2001
From: "Peter N. O. Gillespie" <peter.no.gillespie@gmail.com>
Date: Wed, 21 Sep 2022 11:22:18 +0200
Subject: [PATCH] `XspectraParser`: Fix bugs in parsing vector/coord system

Adds information on the coordinate system used in the XSpectra
calculation (either crystallographic or Cartesian). Also addresses a bug
spotted in the stdout of XSpectra itself, where the epsilon/k-vector is
reported by the code during the `read_save_file` subroutine as being in
`[Cartesian frame]` regardless of the coordinate system used by the
parent XSpectra calculation. Because of this, calculations where
`xonly_plot=.true.` cannot meaningfully have metadata for the coordinate
system used by the calculation recorded in the `output_parameters`
dictionary by the parser.

This fix also addresses the fact that the epsilon/k-vector is
essentially reported twice in the event of a calculation restart: once
when the input file is read and a second time when the save file is
read.
---
 src/aiida_quantumespresso/parsers/xspectra.py | 34 ++++++++++++++-----
 .../test_xspectra/test_xspectra_default.yml   |  1 +
 .../test_xspectra/test_xspectra_spin.yml      |  1 +
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/aiida_quantumespresso/parsers/xspectra.py b/src/aiida_quantumespresso/parsers/xspectra.py
index b513605cd..a97746974 100644
--- a/src/aiida_quantumespresso/parsers/xspectra.py
+++ b/src/aiida_quantumespresso/parsers/xspectra.py
@@ -157,22 +157,38 @@ def parse_stdout_xspectra(filecontent, codename=None, message_map=None):
 
     logs = get_logging_container()
     parsed_data = {}
+    # We need to assign this before the loop over each line due to a bug spotted in the code
+    # when `only_plot=True`. Since the epsilon vector is reported before the value of
+    # `xonly_plot`, we need a starting value before the loop begins.
+    parsed_data['plot_only'] = False
 
     lines = filecontent if isinstance(filecontent, list) else filecontent.split('\n')
 
     # Parse the necessary information for data plotting: core level energy of the
     # absorbing atom and the energy zero of the spectrum (typically the Fermi level)
     for line in lines:
-        if 'xepsilon' in line:
-            eps_vector_string = line.split(':')[-1].strip()
-            eps_vector_list = [float(i) for i in eps_vector_string.split('   ')]
-            parsed_data['epsilon_vector'] = eps_vector_list
         if 'xonly_plot:' in line:
             is_only_plot_string = line.split(':')[-1].strip()
-            if is_only_plot_string == 'FALSE':
-                parsed_data['plot_only'] = False
-            elif is_only_plot_string == 'TRUE':
+            if is_only_plot_string == 'TRUE':
                 parsed_data['plot_only'] = True
+        if 'xepsilon  [crystallographic coordinates]' in line:
+            eps_vector_string = line.split(':')[-1].strip()
+            eps_vector_list = [float(i) for i in eps_vector_string.split('   ')]
+            parsed_data['epsilon_vector'] = eps_vector_list
+            parsed_data['eps_k_vector_coords'] = 'crystal'
+        elif 'xepsilon  [cartesian coordinates]' in line:
+            eps_vector_string = line.split(':')[-1].strip()
+            eps_vector_list = [float(i) for i in eps_vector_string.split('   ')]
+            parsed_data['epsilon_vector'] = eps_vector_list
+            parsed_data['eps_k_vector_coords'] = 'cartesian'
+        # There is an oversight in the XSpectra code where the value of xepsilon is reported
+        # as being in "Cartesian frame" when read from the save file regardless of the
+        # coordinate set in the parent calculation, thus we don't report the coordinate system
+        # used in the case of an "only_plot" run.
+        if 'xepsilon [Cartesian frame]' in line and parsed_data['plot_only']:
+            eps_vector_string = line.split(':')[-1].strip()
+            eps_vector_list = [float(i) for i in eps_vector_string.split('   ')]
+            parsed_data['epsilon_vector'] = eps_vector_list
         if 'From SCF save directory' in line:
             if '(spin polarized work)' in line:
                 spin = True
@@ -204,7 +220,7 @@ def parse_stdout_xspectra(filecontent, codename=None, message_map=None):
             parsed_data['fermi_energy'] = ef_energy
             parsed_data['fermi_energy_units'] = ef_energy_units
 
-        # parse dynamical RAM estimates
+        # parse per-process dynamical RAM estimates
         if 'Estimated max dynamical RAM per process' in line:
             value = line.split('>')[-1]
             match = re.match(r'\s+([+-]?\d+(\.\d*)?|\.\d+([eE][+-]?\d+)?)\s*(Mb|MB|GB)', value)
@@ -215,7 +231,7 @@ def parse_stdout_xspectra(filecontent, codename=None, message_map=None):
                 except (IndexError, ValueError):
                     pass
 
-        # parse dynamical RAM estimates
+        # parse total dynamical RAM estimates
         if 'Estimated total dynamical RAM' in line:
             value = line.split('>')[-1]
             match = re.match(r'\s+([+-]?\d+(\.\d*)?|\.\d+([eE][+-]?\d+)?)\s*(Mb|MB|GB)', value)
diff --git a/tests/parsers/test_xspectra/test_xspectra_default.yml b/tests/parsers/test_xspectra/test_xspectra_default.yml
index 5e998a4be..f016246f7 100644
--- a/tests/parsers/test_xspectra/test_xspectra_default.yml
+++ b/tests/parsers/test_xspectra/test_xspectra_default.yml
@@ -4,6 +4,7 @@ parameters:
   core_level_energy_units: eV
   energy_zero: '4.1718'
   energy_zero_units: eV
+  eps_k_vector_coords: crystal
   epsilon_vector:
   - 1.0
   - 0.0
diff --git a/tests/parsers/test_xspectra/test_xspectra_spin.yml b/tests/parsers/test_xspectra/test_xspectra_spin.yml
index f1279e185..438422df3 100644
--- a/tests/parsers/test_xspectra/test_xspectra_spin.yml
+++ b/tests/parsers/test_xspectra/test_xspectra_spin.yml
@@ -4,6 +4,7 @@ parameters:
   core_level_energy_units: eV
   energy_zero: '6.8316'
   energy_zero_units: eV
+  eps_k_vector_coords: crystal
   epsilon_vector:
   - 1.0
   - 0.0