Skip to content

Commit 88c0bc0

Browse files
committed
Refactor YAML Configuration for CodeEntropy:
- Simplified the YAML input structure to make it more user-friendly and flexible. - The YAML configuration now supports multiple job runs using a dictionary format, allowing easier scaling for future additions like multiprocessing. - Changed the YAML format to store run configurations in a dictionary for better internal management. - If no configuration file is provided or if the config is empty, the logic defaults to a single run using the provided CLI arguments. - Ensured that necessary arguments (e.g., `top_traj_file`, `selection_string`) are validated, even when falling back to CLI arguments. - Added tests to ensure the new behavior works as expected, including cases where no YAML file is provided and CLI arguments are used instead.
1 parent 3bc5250 commit 88c0bc0

File tree

4 files changed

+436
-124
lines changed

4 files changed

+436
-124
lines changed

.coveragerc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[run]
2+
omit =
3+
.vscode-server/*

CodeEntropy/main_mcc.py

Lines changed: 131 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import argparse
22
import math
3+
import os
34

45
import MDAnalysis as mda
56

@@ -13,118 +14,166 @@
1314

1415
# from datetime import datetime
1516

17+
arg_map = {
18+
"top_traj_file": {
19+
"type": str,
20+
"nargs": "+",
21+
"help": "Path to Structure/topology file followed by Trajectory file(s)",
22+
"default": [],
23+
},
24+
"selection_string": {
25+
"type": str,
26+
"help": "Selection string for CodeEntropy",
27+
"default": "all",
28+
},
29+
"start": {
30+
"type": int,
31+
"help": "Start analysing the trajectory from this frame index",
32+
"default": 0,
33+
},
34+
"end": {
35+
"type": int,
36+
"help": "Stop analysing the trajectory at this frame index",
37+
"default": -1,
38+
},
39+
"step": {
40+
"type": int,
41+
"help": "Interval between two consecutive frames to be read index",
42+
"default": 1,
43+
},
44+
"bin_width": {
45+
"type": int,
46+
"help": "Bin width in degrees for making the histogram",
47+
"default": 30,
48+
},
49+
"tempra": {
50+
"type": float,
51+
"help": "Temperature for entropy calculation (K)",
52+
"default": 298.0,
53+
},
54+
"verbose": {
55+
"type": bool,
56+
"help": "True/False flag for noisy or quiet output",
57+
"default": False,
58+
},
59+
"thread": {"type": int, "help": "How many multiprocess to use", "default": 1},
60+
"outfile": {
61+
"type": str,
62+
"help": "Name of the file where the output will be written",
63+
"default": "outfile.out",
64+
},
65+
"resfile": {
66+
"type": str,
67+
"help": "Name of the file where the residue entropy output will be written",
68+
"default": "res_outfile.out",
69+
},
70+
"mout": {
71+
"type": str,
72+
"help": "Name of the file where certain matrices will be written",
73+
"default": None,
74+
},
75+
"force_partitioning": {"type": float, "help": "Force partitioning", "default": 0.5},
76+
"waterEntropy": {"type": bool, "help": "Calculate water entropy", "default": False},
77+
}
78+
1679

1780
def load_config(file_path):
1881
"""Load YAML configuration file."""
82+
if not os.path.exists(file_path):
83+
raise FileNotFoundError(f"Configuration file '{file_path}' not found.")
84+
1985
with open(file_path, "r") as file:
20-
return yaml.safe_load(file)["arguments"]["CodeEntropy"]
86+
config = yaml.safe_load(file)
87+
88+
# If YAML content is empty, return an empty dictionary
89+
if config is None:
90+
config = {}
91+
92+
return config
2193

2294

2395
def setup_argparse():
24-
"""Set up argument parser with required arguments."""
96+
"""Setup argument parsing dynamically based on arg_map."""
2597
parser = argparse.ArgumentParser(
26-
description="Override YAML defaults with CLI arguments"
27-
)
28-
parser.add_argument(
29-
"-f",
30-
"--top_traj_file",
31-
nargs="+",
32-
help="Path to Structure/topology file followed by Trajectory file(s)",
33-
)
34-
parser.add_argument(
35-
"--selection_string", type=str, help="Selection string for CodeEntropy"
36-
)
37-
parser.add_argument(
38-
"--start", type=int, help="Start analysing the trajectory from this frame index"
39-
)
40-
parser.add_argument(
41-
"--end", type=int, help="Stop analysing the trajectory at this frame index"
42-
)
43-
parser.add_argument(
44-
"--step",
45-
type=int,
46-
help="Interval between two consecutive frames to be read index",
98+
description="CodeEntropy: Entropy calculation with MCC method."
4799
)
48-
parser.add_argument(
49-
"--bin_width", type=int, help="Bin width in degrees for making the histogram"
50-
)
51-
parser.add_argument(
52-
"--tempra", type=float, help="Temperature for entropy calculation (K)"
53-
)
54-
parser.add_argument(
55-
"--verbose", type=bool, help="True/False flag for noisy or quiet output"
56-
)
57-
parser.add_argument("--thread", type=int, help="How many multiprocess to use")
58-
parser.add_argument(
59-
"--outfile", help="Name of the file where the output will be written"
60-
)
61-
parser.add_argument(
62-
"--resfile",
63-
help="Name of the file where the residue entropy output will be written",
64-
)
65-
parser.add_argument(
66-
"--mout", help="Name of the file where certain matrices will be written"
67-
)
68-
parser.add_argument(
69-
"--force_partitioning",
70-
)
71-
parser.add_argument("--waterEntropy")
100+
101+
for arg, properties in arg_map.items():
102+
kwargs = {key: properties[key] for key in properties if key != "help"}
103+
parser.add_argument(f"--{arg}", **kwargs, help=properties.get("help"))
104+
72105
return parser
73106

74107

75-
def merge_configs(args, config):
108+
def merge_configs(args, run_config):
76109
"""Merge CLI arguments with YAML configuration."""
77-
for key, value in config.items():
78-
if getattr(args, key) is None:
79-
setattr(
80-
args,
81-
key,
82-
(
83-
value["default"]
84-
if isinstance(value, dict) and "default" in value
85-
else value
86-
),
87-
)
88-
for key, value in vars(args).items():
89-
if value is not None:
90-
config[key] = value
91-
return config
110+
if run_config is None:
111+
run_config = {}
112+
113+
if not isinstance(run_config, dict):
114+
raise TypeError("run_config must be a dictionary or None.")
115+
116+
# Step 1: Merge YAML configuration into args
117+
for key, value in run_config.items():
118+
if getattr(args, key, None) is None:
119+
setattr(args, key, value)
120+
121+
# Step 2: Set default values for any missing arguments from `arg_map`
122+
for key, params in arg_map.items():
123+
if getattr(args, key, None) is None:
124+
setattr(args, key, params.get("default"))
125+
126+
# Step 3: Override with CLI values if provided
127+
for key in arg_map.keys():
128+
cli_value = getattr(args, key, None)
129+
if cli_value is not None:
130+
run_config[key] = cli_value
131+
132+
return args
92133

93134

94135
def main():
95136
"""
96137
Main function for calculating the entropy of a system using the multiscale cell
97138
correlation method.
98139
"""
99-
100140
try:
101141
config = load_config("config.yaml")
102-
parser = setup_argparse()
103-
args = parser.parse_args()
104-
config = merge_configs(args, config)
105142

106-
# Check for required arguments
107-
if not config.get("top_traj_file"):
143+
if config is None:
108144
raise ValueError(
109-
"The 'top_traj_file' argument is required but not provided."
110-
)
111-
if not config.get("selection_string"):
112-
raise ValueError(
113-
"The 'selection_string' argument is required but not provided."
145+
"No configuration file found, and no CLI arguments were provided."
114146
)
115147

116-
except argparse.ArgumentError:
117-
print("Command line arguments are ill-defined, please check the arguments")
118-
raise
148+
parser = setup_argparse()
149+
args, unknown = parser.parse_known_args()
150+
151+
# Process each run in the YAML configuration
152+
for run_name, run_config in config.items():
153+
if isinstance(run_config, dict):
154+
# Merging CLI arguments with YAML configuration
155+
args = merge_configs(args, run_config)
156+
157+
# Ensure necessary arguments are provided
158+
if not getattr(args, "top_traj_file"):
159+
raise ValueError(
160+
"The 'top_traj_file' argument is required but not provided."
161+
)
162+
if not getattr(args, "selection_string"):
163+
raise ValueError(
164+
"The 'selection_string' argument is required but not provided."
165+
)
166+
167+
# REPLACE INPUTS
168+
print(f"Printing all input for {run_name}")
169+
for arg in vars(args):
170+
print(f" {arg}: {getattr(args, arg) or ''}")
171+
else:
172+
print(f"Run configuration for {run_name} is not a dictionary.")
119173
except ValueError as e:
120174
print(e)
121175
raise
122176

123-
# REPLACE INPUTS
124-
print("Printing all input")
125-
for arg in vars(args):
126-
print(" {}: {}".format(arg, getattr(args, arg) or ""))
127-
128177
# startTime = datetime.now()
129178

130179
# Get topology and trajectory file names and make universe

config.yaml

Lines changed: 15 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,17 @@
11
---
22

3-
arguments:
4-
CodeEntropy:
5-
top_traj_file:
6-
required: True
7-
default: []
8-
selection_string:
9-
type: str
10-
default: "all"
11-
start:
12-
type: int
13-
default: 0
14-
end:
15-
type: int
16-
default: -1
17-
step:
18-
type: int
19-
default: 1
20-
bin_width:
21-
type: int
22-
default: 30
23-
tempra:
24-
type: float
25-
default: 298.0
26-
verbose:
27-
type: bool
28-
default: False
29-
thread:
30-
type: int
31-
default: 1
32-
outfile:
33-
type: str
34-
default: "outfile.out"
35-
resfile:
36-
default: "res_outfile.out"
37-
mout:
38-
default: null
39-
force_partitioning:
40-
type: float
41-
default: 0.5
42-
waterEntropy:
43-
type: bool
44-
default: False
3+
run1:
4+
top_traj_file:
5+
selection_string:
6+
start:
7+
end:
8+
step:
9+
bin_width:
10+
tempra:
11+
verbose:
12+
thread:
13+
outfile:
14+
resfile:
15+
mout:
16+
force_partitioning:
17+
waterEntropy:

0 commit comments

Comments
 (0)