-
Notifications
You must be signed in to change notification settings - Fork 1
/
ludwig_evaluate.xml
120 lines (111 loc) · 5.21 KB
/
ludwig_evaluate.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
<tool id="ludwig_evaluate" name="Ludwig Evaluate" version="@VERSION@" profile="@PROFILE@">
<description>loads a pretrained model and evaluates its performance by comparing its predictions with ground truth</description>
<macros>
<import>ludwig_macros.xml</import>
</macros>
<expand macro="python_requirements" />
<expand macro="macro_stdio" />
<version_command>echo "@VERSION@"</version_command>
<command>
<![CDATA[
mkdir -p outputs &&
#if $dataset
ln -sf '$dataset' "`pwd`/${dataset.element_identifier}";
#end if
#if $raw_data
unzip -o -q '$raw_data' -d ./;
#end if
python '$__tool_directory__/ludwig_evaluate.py'
#if $model_path
--model_path '$model_path.extra_files_path'
#end if
#if $dataset
--dataset "`pwd`/${dataset.element_identifier}"
#end if
#if $disable_parallel_threads
--disable_parallel_threads
#end if
#if $skip_collect_predictions
--skip_collect_predictions
#end if
--output_directory "`pwd`/outputs"
--data_format '$data_format'
--split '$split'
--backend local
--skip_save_unprocessed_output &&
mkdir -p '$output_report.extra_files_path' &&
cp outputs/*.json outputs/*.parquet '$output_report.extra_files_path' &&
echo "Evaluation is Done!"
]]>
</command>
<configfiles>
<inputs name="inputs" />
</configfiles>
<inputs>
<param name="model_path" type="data" format="ludwig_model" label="Load the pretrained model" />
<param name="dataset" type="data" format="tabular,csv,h5,json,txt" label="Input dataset" />
<param name="data_format" type="select" label="Data format">
<option value="auto" selected="true">auto</option>
<option value="tsv">tsv</option>
<option value="csv">csv</option>
<option value="h5">h5</option>
<option value="json">json</option>
</param>
<param name="split" type="select" label="Select the split portion to test the model on">
<option value="training">training</option>
<option value="validation">validation</option>
<option value="test">test</option>
<option value="full" selected="true">full</option>
</param>
<param name="batch_size" type="integer" value="128" optional="true" label="Batch size" />
<param name="disable_parallel_threads" type="boolean" checked="false" label="Whether to disable parallel threads for reproducibility?" />
<param name="skip_collect_predictions" type="boolean" checked="false" label="Whether to skip collecting predictions?" />
<param name="raw_data" type="data" format="zip" optional="true" label="Raw data" help="Optional. Needed for images."/>
</inputs>
<outputs>
<collection type="list" name="output_pred_csv" label="${tool.name} predictions CSV on ${on_string}" >
<discover_datasets pattern="(?P<designation>.+)\.csv" format="csv" directory="outputs" />
<filter>not skip_collect_predictions</filter>
</collection>
<data format="html" name="output_report" from_work_dir="outputs/smart_report.html" label="${tool.name} report on ${on_string}" />
</outputs>
<tests>
<test>
<param name="model_path" value="" ftype="ludwig_model">
<composite_data value="temp_model01/model_hyperparameters.json" />
<composite_data value="temp_model01/model_weights" />
<composite_data value="temp_model01/training_set_metadata.json" />
<composite_data value="temp_model01/training_progress.json" />
</param>
<param name="dataset" value="temperature_la.csv" ftype="csv" />
<param name="split" value="test" />
<output name="output_report" ftype="html">
<extra_files type="file" name="predictions.parquet" value="temp_predictions.parquet" compare="sim_size" delta="50" />
<extra_files type="file" name="predictions.shapes.json" value="temp_predictions.shapes.json" lines_diff="5" />
<extra_files type="file" name="test_statistics.json" value="temp_test_statistics.json" lines_diff="5" />
</output>
<output_collection name="output_pred_csv">
<element name="temperature_predictions">
<assert_contents>
<has_n_lines n="9051" />
<has_n_columns n="1" />
<has_size value="186816" delta="50" />
</assert_contents>
</element>
</output_collection>
</test>
</tests>
<help>
<![CDATA[
**What it does**
This tool conducts `ludwig evaluate`.
**Input**
- a trained ludwig model.
- dataset to be evaluate.
**Output**
- report in html.
- a collection of prediction results.
]]>
</help>
<expand macro="macro_citations" />
</tool>