Skip to content

Commit

Permalink
Changes based on reviews from Galaxy usegalaxy tools team (#18)
Browse files Browse the repository at this point in the history
* comments fixed

* Update ludwig_hyperopt.py

line added for lint

* fix_from_comments_Junhao

* fix_#2

* Change discover_datasets pattern in viz outputs

Noticed that there is a comment about this.

---------

Co-authored-by: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com>
  • Loading branch information
paulocilasjr and qchiujunhao authored Jan 6, 2025
1 parent dbefc31 commit 6274302
Show file tree
Hide file tree
Showing 11 changed files with 147 additions and 110 deletions.
10 changes: 6 additions & 4 deletions tools/ludwig_autogenconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,20 @@
<version_command>echo "@VERSION@"</version_command>
<command>
<![CDATA[
#import re
#if $input_file
ln -sf '$input_file' "./${input_file.element_identifier}";
#set $sanitized_input_file = re.sub('[^\w\-_\.]', '_', $input_file.element_identifier)
ln -sf '$input_file' "./${sanitized_input_file}";
#end if
python '$__tool_directory__/ludwig_autogenconfig.py'
#if $input_file
--dataset "./${input_file.element_identifier}"
#if $input_file
--dataset "./${sanitized_input_file}"
#end if
--output_feature '$output_feature'
--output '$output'
#if $renderconfig
'$renderconfig'
#end if
#end if
]]>
</command>
<inputs>
Expand Down
15 changes: 9 additions & 6 deletions tools/ludwig_evaluate.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,26 @@
<version_command>echo "@VERSION@"</version_command>
<command>
<![CDATA[
#import re
mkdir -p outputs &&
#if $dataset
ln -sf '$dataset' "./${dataset.element_identifier}";
#set $sanitized_dataset = re.sub('[^\w\-_\.]', '_', $dataset.element_identifier)
ln -sf '$dataset' "./${sanitized_dataset}";
#end if
#if $raw_data
unzip -o -q '$raw_data' -d ./;
unzip -o -q '$raw_data' -d ./;
#end if
python '$__tool_directory__/ludwig_evaluate.py'
#if $model_path
--model_path '$model_path.extra_files_path'
#end if
#if $dataset
--dataset "./${dataset.element_identifier}"
--dataset "./${sanitized_dataset}"
#end if
#if $disable_parallel_threads
--disable_parallel_threads
#end if
--output_directory "./outputs"
--data_format '$data_format'
--split '$split'
Expand All @@ -35,7 +38,6 @@
cp outputs/*.json outputs/*.parquet '$output_report.extra_files_path' &&
echo "Evaluation is Done!"
]]>
</command>
<configfiles>
Expand All @@ -57,7 +59,7 @@
<option value="test">test</option>
<option value="full" selected="true">full</option>
</param>
<param name="batch_size" type="integer" value="128" optional="true" label="Batch size" />
<param name="batch_size" type="integer" value="128" optional="true" label="Batch size" min="1" max="4096"/>
<param name="disable_parallel_threads" type="boolean" checked="false" label="Whether to disable parallel threads for reproducibility?" />
<param name="raw_data" type="data" format="zip" optional="true" label="Raw data" help="Optional. Needed for images."/>
</inputs>
Expand All @@ -79,11 +81,12 @@
</param>
<param name="dataset" value="temperature_la.csv" ftype="csv" />
<param name="split" value="test" />
<output name="output_report" ftype="html" >
<output name="output_report" ftype="html">
<assert_contents>
<has_text text="Evaluate" />
</assert_contents>
</output>

<output_collection name="output_pred_csv">
<element name="predictions_parquet.csv">
<assert_contents>
Expand Down
2 changes: 1 addition & 1 deletion tools/ludwig_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def render_report(
]

with open(os.path.join(output_directory, "report_config.yml"), 'w') as fh:
yaml.dump(report_config, fh)
yaml.safe_dump(report_config, fh)

report_path = os.path.join(output_directory, "smart_report.html")
generate_report.main(
Expand Down
38 changes: 26 additions & 12 deletions tools/ludwig_experiment.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,38 @@
<version_command>echo "@VERSION@"</version_command>
<command>
<![CDATA[
#import re
#if $config
ln -sf '$config' "./config.yml";
#end if
#if $dataset
ln -sf '$dataset' "./${dataset.element_identifier}";
#set $sanitized_dataset = re.sub('[^\w\-_\.]', '_', $dataset.element_identifier)
ln -sf '$dataset' "./${sanitized_dataset}";
#end if
#if $training_set
ln -sf '$training_set' "./${training_set.element_identifier}";
#set $sanitized_training_set = re.sub('[^\w\-_\.]', '_', $training_set.element_identifier)
ln -sf '$training_set' "./${sanitized_training_set}";
#end if
#if $validation_set
ln -sf '$validation_set' "./${validation_set.element_identifier}";
#set $sanitized_validation_set = re.sub('[^\w\-_\.]', '_', $validation_set.element_identifier)
ln -sf '$validation_set' "./${sanitized_validation_set}";
#end if
#if $test_set
ln -sf '$test_set' "./${test_set.element_identifier}";
#set $sanitized_test_set = re.sub('[^\w\-_\.]', '_', $test_set.element_identifier)
ln -sf '$test_set' "./${sanitized_test_set}";
#end if
#if $raw_data
unzip -o -q '$raw_data' -d ./;
unzip -o -q '$raw_data' -d ./;
#end if
python '$__tool_directory__/ludwig_experiment.py'
#if $config
--config "./config.yml"
Expand All @@ -37,16 +51,16 @@
--model_resume_path '$model_resume_path.model_resume_path'
#end if
#if $dataset
--dataset "./${dataset.element_identifier}"
--dataset "./${sanitized_dataset}"
#end if
#if $training_set
--training_set "./${training_set.element_identifier}"
--training_set "./${sanitized_training_set}"
#end if
#if $validation_set
--validation_set "./${validation_set.element_identifier}"
--validation_set "./${sanitized_validation_set}"
#end if
#if $test_set
--test_set "./${test_set.element_identifier}"
--test_set "./${sanitized_test_set}"
#end if
#if $training_set_metadata
--training_set_metadata '$training_set_metadata'
Expand All @@ -68,7 +82,7 @@
mkdir -p '$output_model.extra_files_path' &&
cp -r experiment_run/model/*.json experiment_run/model/model_weights '$output_model.extra_files_path' &&
echo "Experiment is Done!"
echo "Experiment is Done!"
]]>
</command>
<configfiles>
Expand Down Expand Up @@ -96,8 +110,8 @@
<option value="test" selected="true">test</option>
<option value="full">full</option>
</param>
<param argument="k_fold" type="integer" value="" optional="true" label="number of folds for a k-fold cross validation run" />
<param argument="random_seed" type="integer" value="42" label="Randonness seed" />
<param argument="k_fold" type="integer" value="" optional="true" label="number of folds for a k-fold cross validation run" min="2" max="10"/>
<param argument="random_seed" type="integer" value="42" label="Randomness seed" min="0" max="999999"/>
<param argument="disable_parallel_threads" type="boolean" checked="false" label="Whether to disable parallel threads for reproducibility?" />
<!-- <param argument="skip_save_predictions" type="boolean" checked="false" label="Whether to skip saving predictions?" /> -->
<param name="raw_data" type="data" format="zip" optional="true" label="Raw data" help="Optional. Needed for images."/>
Expand Down
52 changes: 27 additions & 25 deletions tools/ludwig_hyperopt.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,35 @@
<version_command>echo "@VERSION@"</version_command>
<command>
<![CDATA[
TMPDIR='/tmp';
TMP=\$TMPDIR;
TEMP=\$TMPDIR;
export TMPDIR TMP TEMP &&
#import re
#if $config
cp '$config' "./config.yml";
#end if
#if $dataset
ln -sf '$dataset' "./${dataset.element_identifier}";
#set $sanitized_dataset = re.sub('[^\w\-_\.]', '_', $dataset.element_identifier)
ln -sf '$dataset' "./${sanitized_dataset}";
#end if
#if $training_set
ln -sf '$training_set' "./${training_set.element_identifier}";
#set $sanitized_training_set = re.sub('[^\w\-_\.]', '_', $training_set.element_identifier)
ln -sf '$training_set' "./${sanitized_training_set}";
#end if
#if $validation_set
ln -sf '$validation_set' "./${validation_set.element_identifier}";
#set $sanitized_validation_set = re.sub('[^\w\-_\.]', '_', $validation_set.element_identifier)
ln -sf '$validation_set' "./${sanitized_validation_set}";
#end if
#if $test_set
ln -sf '$test_set' "./${test_set.element_identifier}";
#set $sanitized_test_set = re.sub('[^\w\-_\.]', '_', $test_set.element_identifier)
ln -sf '$test_set' "./${sanitized_test_set}";
#end if
#if $raw_data
unzip -o -q '$raw_data' -d ./;
unzip -o -q '$raw_data' -d ./;
#end if
python '$__tool_directory__/ludwig_hyperopt.py'
#if $config
--config "./config.yml"
Expand All @@ -38,16 +45,16 @@
--model_load_path '$model_load_path.extra_files_path'
#end if
#if $dataset
--dataset "./${dataset.element_identifier}"
--dataset "./${sanitized_dataset}"
#end if
#if $training_set
--training_set "./${training_set.element_identifier}"
--training_set "./${sanitized_training_set}"
#end if
#if $validation_set
--validation_set "./${validation_set.element_identifier}"
--validation_set "./${sanitized_validation_set}"
#end if
#if $test_set
--test_set "./${test_set.element_identifier}"
--test_set "./${sanitized_test_set}"
#end if
#if $training_set_metadata
--training_set_metadata '$training_set_metadata'
Expand All @@ -68,7 +75,6 @@
best_trial=\$(cat ../outputs/tool_stdout | grep "^Current best trial:" | tail -1 | cut -d" " -f 4) &&
mkdir -p '$output_model.extra_files_path' &&
cp hyperopt/trial_\$best_trial/hyperopt_run/model/*.json hyperopt/trial_\$best_trial/hyperopt_run/model/model_weights '$output_model.extra_files_path'
]]>
</command>
<configfiles>
Expand All @@ -89,7 +95,7 @@
<option value="h5">h5</option>
<option value="json">json</option>
</param>
<param name="random_seed" type="integer" value="42" label="Randonness seed" />
<param name="random_seed" type="integer" value="42" label="Randomness seed" min="0" max="999999" />
<!-- <param name="save_model" type="boolean" checked="true" label="Whether to save model?" /> -->
<param name="raw_data" type="data" format="zip" optional="true" label="Raw data" help="Optional. Needed for images."/>
</inputs>
Expand All @@ -112,19 +118,15 @@
</tests>
<help>
<![CDATA[
**What it does**
Hyperparameter tuning.
**Input**
**What it does**
Hyperparameter tuning.
**Output**
**Input**
- Hyperopt report
- The best Ludwig model
**Output**
- Hyperopt report
- The best Ludwig model
]]>
</help>
Expand Down
4 changes: 2 additions & 2 deletions tools/ludwig_macros.xml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
</xml>

<xml name="visualize_labels_limit">
<param argument="labels_limit" type="integer" value="" optional="true" label="Set the upper limit on the numeric encoded label value" help="Encoded numeric label values in dataset that are higher than `label_limit` are considered to be 'rare' labels." />
<param argument="labels_limit" type="integer" value="" optional="true" label="Set the upper limit on the numeric encoded label value" help="Encoded numeric label values in dataset that are higher than `label_limit` are considered to be 'rare' labels." min="1" max="1000"/>
</xml>

<xml name="visualize_metrics">
Expand All @@ -130,7 +130,7 @@
</xml>

<xml name="visualize_positive_label">
<param argument="positive_label" type="integer" value="1" label="Numeric encoded value for the positive class" />
<param argument="positive_label" type="integer" value="1" label="Numeric encoded value for the positive class" min="1" max="1000" />
</xml>

<xml name="visualize_ground_truth_apply_idx">
Expand Down
12 changes: 8 additions & 4 deletions tools/ludwig_predict.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,23 @@
<version_command>echo "@VERSION@"</version_command>
<command>
<![CDATA[
#import re
mkdir -p outputs &&
#if $dataset
ln -sf '$dataset' "./${dataset.element_identifier}";
#set $sanitized_dataset = re.sub('[^\w\-_\.]', '_', $dataset.element_identifier)
ln -sf '$dataset' "./${sanitized_dataset}";
#end if
#if $raw_data
unzip -o -q '$raw_data' -d ./;
unzip -o -q '$raw_data' -d ./;
#end if
python '$__tool_directory__/ludwig_predict.py'
#if $model_path
--model_path '$model_path.extra_files_path'
#end if
#if $dataset
--dataset "./${dataset.element_identifier}"
--dataset "./${sanitized_dataset}"
#end if
#if $disable_parallel_threads
--disable_parallel_threads
Expand Down Expand Up @@ -53,7 +57,7 @@
<option value="test">test</option>
<option value="full" selected="true">full</option>
</param>
<param name="batch_size" type="integer" value="128" optional="true" label="Batch size" />
<param name="batch_size" type="integer" value="128" optional="true" label="Batch size" min="1" max="4096" />
<param name="disable_parallel_threads" type="boolean" checked="false" label="Whether to disable parallel threads for reproducibility?" />
<param name="raw_data" type="data" format="zip" optional="true" label="Raw data" help="Optional. Needed for images."/>
</inputs>
Expand Down
2 changes: 1 addition & 1 deletion tools/ludwig_render_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
config[HYPEROPT] = params[HYPEROPT][HYPEROPT]

with open('./pre_config.yml', 'w') as f:
yaml.dump(config, f, allow_unicode=True, default_flow_style=False)
yaml.safe_dump(config, f, allow_unicode=True, default_flow_style=False)

output = sys.argv[2]
output_config = merge_with_defaults(config)
Expand Down
Loading

0 comments on commit 6274302

Please sign in to comment.