diff --git a/howso/analysis.amlg b/howso/analysis.amlg index f5582e734..2dec3ca3d 100644 --- a/howso/analysis.amlg +++ b/howso/analysis.amlg @@ -225,7 +225,7 @@ (accum (assoc analyze_warnings (associate (concat - "It is recomended to use a \"targetless\" analysis of the data for a time-series Trainee. " + "It is recommended to use a \"targetless\" analysis of the data for a time-series Trainee. " "Please analyze the data once more with no action features specified and the value \"targetless\" " "specified for the \"targeted_model\" parameter." )) diff --git a/howso/get_cases.amlg b/howso/get_cases.amlg index 9ee1552b5..051ead783 100644 --- a/howso/get_cases.amlg +++ b/howso/get_cases.amlg @@ -92,9 +92,10 @@ #!GetCaseGivenReplaySession (get (retrieve_from_entity session ".replay_steps") session_index) - ;returns assoc with features and cases - a list of lists of all feature values. Retrieves all feature values for cases for - ;all (unordered) sessions in the order they were trained within each session. If a session is specified, only that session's - ;cases wil be output. + ;returns assoc with features and cases - a list of lists of all feature values. Retrieves all feature values for cases in + ;all sessions. If a session is specified, only that session's cases will be output. Session and case order is not guaranteed, + ;however, the features ".session" and ".session_training_index" may be requested to get the session id and session train order + ;for each case respectively. ;{read_only (true) idempotent (true)} #get_cases (declare @@ -114,7 +115,7 @@ ; } (assoc ;{type "list" values "string"} - ;list of features to retrieve. + ;list of features to retrieve. Case values will be output given this feature order. features (list) ;{type "number"} ;set flag to skip decoding feature values into their nominal values for output. diff --git a/howso/get_sessions.amlg b/howso/get_sessions.amlg index e08d49a78..17e841f4b 100644 --- a/howso/get_sessions.amlg +++ b/howso/get_sessions.amlg @@ -141,11 +141,11 @@ (call !ValidateParameters) (call !Return (assoc payload - (map + (sort (map (lambda (retrieve_from_entity (current_value) !internalLabelSessionTrainingIndex)) ;list of all cases trained for specified session (retrieve_from_entity session ".replay_steps") - ) + )) )) ) diff --git a/howso/return_types.amlg b/howso/return_types.amlg index b26a4f507..d842c7917 100644 --- a/howso/return_types.amlg +++ b/howso/return_types.amlg @@ -782,7 +782,7 @@ "ablated_indices" { type "list" values "number" - description "The session training indices for the ablated cases." + description "The indices of the ablated input cases." } "status" { type ["string" "null"] diff --git a/howso/train.amlg b/howso/train.amlg index 4fb36b5d2..1749aa7e4 100644 --- a/howso/train.amlg +++ b/howso/train.amlg @@ -72,7 +72,10 @@ (null ##.replay_steps (list) ##.indices_map (assoc) + ;total count of cases trained ##.trained_instance_count 0 + ;total count of cases observed (incl ablated, trained as weights, etc) + ##.total_instance_count 0 ##.metadata (assoc) ) ))) @@ -81,6 +84,7 @@ (declare (assoc trained_instance_count (retrieve_from_entity session ".trained_instance_count") + total_instance_count (retrieve_from_entity session ".total_instance_count") series_cases (if (!= (null) series) (get !seriesStore series)) status_output (null) message (null) @@ -152,6 +156,11 @@ cases cases )) + ;capture these cases into the total observed count + (accum_to_entities session (assoc + ".total_instance_count" (size cases) + )) + (accum_to_entities (assoc !revision 1)) (conclude @@ -327,14 +336,7 @@ (if accumulate_weight_feature cases ;else only accumulate for cases that were actually trained and not ablated - (unzip - cases - (remove - (indices cases) - ;change ablated_indices_list into a 0-based list to match indices of cases - (map (lambda (- (current_value) trained_instance_count)) ablated_indices_list) - ) - ) + (unzip cases (remove (indices cases) ablated_indices_list)) ) )) )) @@ -503,6 +505,11 @@ (assign_to_entities (assoc !inactiveFeaturesNeedCaching (true) )) ) + ;capture these cases into the total observed count + (accum_to_entities session (assoc + ".total_instance_count" (size cases) + )) + (accum_to_entities (assoc !revision 1)) ;return response @@ -511,7 +518,7 @@ payload (assoc "num_trained" (size new_case_ids) - "ablated_indices" ablated_indices_list + "ablated_indices" (sort ablated_indices_list) "status" status_output ) )) @@ -562,7 +569,12 @@ )) ) - (if (!= (size features) (size (first cases)) ) + ;verify all row sizes match the number of features + (if + (size (filter + (lambda (!= (size features) (size (current_value)))) + cases + )) (conclude (conclude (call !Return (assoc errors (list "The number of feature names specified does not match the number of feature values given.") @@ -711,7 +723,7 @@ (current_value 1) ) session (get_value session) - session_training_index (+ trained_instance_count (current_index 1)) + session_training_index (+ total_instance_count (current_index 1)) )) ) cases @@ -823,7 +835,7 @@ (call !AblateCases (assoc cases (unzip cases (range input_case_index (+ input_case_index batch_size -1)) ) ;ensure that starting training index value is updated for each batch - session_training_index (+ trained_instance_count input_case_index) + session_training_index (+ total_instance_count input_case_index) )) )) @@ -945,8 +957,8 @@ (map (lambda (if (size ts_ablated_indices_map) - (+ session_training_index (get ts_ablated_indices_map (current_value))) - (+ session_training_index (current_value)) + (get ts_ablated_indices_map (+ input_case_index (current_value))) + (+ input_case_index (current_value)) ) ) (remove (indices cases) indices_to_train) @@ -996,7 +1008,11 @@ feature_values ) session (get_value session) - session_training_index (+ session_training_index (current_value 1)) + session_training_index + (if (size ts_ablated_indices_map) + (+ total_instance_count (get ts_ablated_indices_map (+ input_case_index (current_value 1)))) + (+ session_training_index (current_value 1)) + ) )) )) indices_to_train diff --git a/howso/train_ts_ablation.amlg b/howso/train_ts_ablation.amlg index 2e257fc79..e6ae6d312 100644 --- a/howso/train_ts_ablation.amlg +++ b/howso/train_ts_ablation.amlg @@ -16,8 +16,8 @@ )) ;This flow creates a set of all unique ids, then iterates over all cases, looking at each case's id - ;if a different id is encontered it is removed from that unique set - ;if an id that has already been removed from the set is encontered again, + ;if a different id is encountered it is removed from that unique set + ;if an id that has already been removed from the set is encountered again, ;that means the case is out of order and the data needs to be grouped (if one_id (let @@ -125,7 +125,7 @@ ;helper method ;outputs a sorted list of cases where any case for a specific series that was among cases of a different series, - ;will be moved to be together among other cases of its own series, while mantaining the original order of different series + ;will be moved to be together among other cases of its own series, while maintaining the original order of different series ;e.g., if these are series IDS of cases: A A A B A A B B C B C C C the result would be: A A A A A B B B B C C C C #!GroupDataByIds (seq @@ -233,8 +233,8 @@ output_case_ids [] ;map of sorted case index -> original case index used to lookup which original rows were ablated after data is sorted ts_ablated_indices_map (null) - ;number of cases to be trained from each series, accumulated to keep track of session training index - num_previously_trained_cases 0 + ;series which received new cases that occurred before the last trained case of the series + out_of_sequence_series_ids [] )) (while (< (current_index) num_rows) @@ -257,6 +257,7 @@ (- (current_index 1) 1) ) features original_features + features_indices (indices original_features) ) ;train one entire series at a time @@ -270,12 +271,26 @@ (assign (assoc previous_ids (unzip (get cases (current_index 1)) id_indices) start_index (current_index 1) - num_previously_trained_cases (+ num_previously_trained_cases 1 (- end_index start_index)) )) ) ) ) + ;remove the temporary features + (assign (assoc features original_features)) + + ;warn when there are out of sequence new cases + (if (size out_of_sequence_series_ids) + (accum (assoc + warnings + (associate (concat + "Training time series cases out of sequence with ablation enabled is not supported and may " + "cause unexpected results. The following series received out of sequence cases: " + (apply "concat" (trunc (weave out_of_sequence_series_ids ", ")) ) + )) + )) + ) + ;return trained case_ids output_case_ids ) @@ -283,35 +298,11 @@ ;derive then train with ablation #!TrainSingleSeriesWithAblation (seq - (if (size series_ordered_by_features) - (assign (assoc - data - (call !MultiSortList (assoc - data data - column_order_indices (unzip feature_index_map series_ordered_by_features) - )) - )) - ) - ;map of index -> original index - (assign (assoc - ts_ablated_indices_map - (zip - (indices data) - (map (lambda (last (current_value))) data) - ) - )) - - ;drop that last 'index' column - (assign (assoc - data (map (lambda (trunc (current_value))) data) - )) (declare (assoc ;series_index of each row, will be set to be non-zero if some series cases were already trained previously continue_series_index 0 - trained_cases_reversed_order (false) id_values (unzip (first data) id_indices) - previous_range (null) untrained_data_size (size data) )) @@ -334,22 +325,36 @@ ) ) ) + trained_series_cases_features (append features [".series_index" ".case_id"]) + ;cases with the same series id that are already trained trained_series_cases [] + ;indices of trained cases in the sorted combined data + trained_case_indices [] + ;indices of nulls for previously ablated cases in the sorted combined data + trained_ablated_indices [] + last_trained_time_value (null) )) ;if previously trained series cases exist for this series, prepend them to data (if (size trained_series_case_ids) (let (assoc - features_indices (indices features) - prev_row_index 0 + ;last seen series index (for finding ablated cases to insert nulls) + prev_series_index (null) + ;row index for existing cases + next_series_row 0 + ;series_index index in trained_series_cases + series_index_feature_index (- (size trained_series_cases_features) 2) + ;case_id index in trained_series_cases + case_id_feature_index (- (size trained_series_cases_features) 1) ) - ;overwrite trained_series_cases to contain each cases's feature values and all the progress_features values + ;set trained_series_cases to contain each cases's feature values, series index, and case id (assign (assoc trained_series_cases (map (lambda + ;NOTE: columns here must match trained_series_cases_features (append (retrieve_from_entity (current_value) (append features ".series_index") ) (current_value) @@ -357,79 +362,173 @@ ) trained_series_case_ids ) - series_progress_index_feature_index (size features) )) + ;combine and sort the new cases with existing cases (assign (assoc - trained_series_cases + data (call !MultiSortList (assoc - data trained_series_cases - column_order_indices [ time_feature_index ] + data (append trained_series_cases data) + column_order_indices + (if (size series_ordered_by_features) + (unzip feature_index_map series_ordered_by_features) + [ time_feature_index ] + ) )) )) (assign (assoc - ;flag set to true if previously trained cases were trained in reverse order, - ;i.e., they come later in the series than the cases being trained now - trained_cases_reversed_order - (> - (get (first trained_series_cases) time_feature_index) - (get (first data) time_feature_index) + ;determine the last time value so we can verify new cases occur after this + last_trained_time_value + (apply "max" (map + (lambda (get (current_value) time_feature_index)) + trained_series_cases + )) + ;set continue_series_index to the would-be next index value + continue_series_index + (+ + 1 + (apply "max" (map + (lambda (get (current_value) series_index_feature_index)) + trained_series_cases + )) + ) + ;using sorted data we need to map index -> original_index where the index is the + ;index in a list of *only* the new cases + ts_ablated_indices_map + (zip + (range 0 (- (size data) (size trained_series_cases) 1) 1) + (filter (map + (lambda + (if (!= (size (current_value)) (size trained_series_cases_features)) + (last (current_value)) + ) + ) + data + )) ) - trained_series_case_ids (map (lambda (last (current_value))) trained_series_cases) - )) - - ;set continue_series_index to the would-be next index value - (assign (assoc - continue_series_index (+ 1 (get (last trained_series_cases) series_progress_index_feature_index) ) )) ;previously trained series was ablated because the number of cases is less than the continue series index (if (< (size trained_series_cases) continue_series_index) (assign (assoc - trained_series_cases + data ;fill previously ablated cases with nulls (range - (lambda - (if (= (current_index) (get trained_series_cases [prev_row_index series_progress_index_feature_index])) + (lambda (let + (assoc + series_index (get data [next_series_row series_index_feature_index]) + current_row (get data next_series_row) + ) + (if (!= (size current_row) (size trained_series_cases_features)) + ;output the new case (seq - (accum (assoc prev_row_index 1)) - (get trained_series_cases [(- prev_row_index 1) series_progress_index_feature_index]) + (accum (assoc next_series_row 1)) + ;return the new case + current_row ) - ;else output (null) - ) + ;else if the first series index we encounter is > 0 we need to add nulls to the start + (and (= (null) prev_series_index) (> series_index 0)) + (seq + (assign (assoc prev_series_index 0)) + ;return null + (null) + ) - ) - 0 series_continuation_index 1 + ;else if there is a gap since the last series index, output null + (> (- series_index prev_series_index) 1) + (seq + (accum (assoc prev_series_index 1)) + ;return null + (null) + ) + + ;else output the existing case + (seq + (assign (assoc + next_series_row (+ next_series_row 1) + prev_series_index series_index + )) + ;return the existing case + current_row + ) + ) + )) + 0 (+ continue_series_index untrained_data_size -1) 1 ) )) ) - ;combine previously trained data with this new data (assign (assoc - data - (if trained_cases_reversed_order - (append - data - (map - (lambda (unzip (current_value) features_indices)) - trained_series_cases + ;get updated order of case ids + trained_series_case_ids + (filter (map + (lambda + (if (= (size (current_value)) (size trained_series_cases_features)) + (get (current_value) case_id_feature_index) ) ) + data + )) + ;get the indices of the trained cases so they can be removed later + trained_case_indices + (filter + (lambda (= + (size (get data (current_value))) + (size trained_series_cases_features) + )) + (indices data) + ) + ;get the indices of the ablated cases so they can be removed later + trained_ablated_indices + (filter + (lambda (= (null) (get data (current_value)))) + (indices data) + ) + )) + ) - (append - (map - (lambda (unzip (current_value) features_indices)) - trained_series_cases + ;else if no existing trained cases + (seq + ;sort the data by time + (assign (assoc + data + (call !MultiSortList (assoc + data data + column_order_indices + (if (size series_ordered_by_features) + (unzip feature_index_map series_ordered_by_features) + [ time_feature_index ] ) - data - ) + )) + )) + + ;map of index -> original index + (assign (assoc + ts_ablated_indices_map + (zip + (indices data) + (map (lambda (last (current_value))) data) ) )) ) ) + ;drop extra temporary columns from data + (assign (assoc + data + (map + (lambda + (if (= (null) (current_value)) + (null) ;ablated case + (unzip (current_value) features_indices) + ) + ) + data + ) + )) + ;now that the length of each new series is known, ensure that ts_series_length_limit is e*(longest series) (if (> (* 2.718281828459 (size data)) ts_series_length_limit) (assign (assoc ts_series_length_limit (* 2.718281828459 (size data)) )) @@ -450,56 +549,55 @@ derived_progress_values_lists (call !DeriveProgressFeaturesForData) )) - ;there were existing cases, update their progress values - (if (size trained_series_case_ids) - (map - (lambda - (assign_to_entities - (current_value) - (zip - progress_features - (get derived_progress_values_lists (+ (current_index) (if trained_cases_reversed_order untrained_data_size 0)) ) - ) - ) - ) - trained_series_case_ids - ) - ) - - ;append all the progress values to data (assign (assoc features (append features progress_features ) data (map - (lambda (let - (assoc - row_index - (if trained_cases_reversed_order - (current_index 1) - (+ continue_series_index (current_index 1)) - ) - ) - (append - (current_value) - ;for each of the three progress features, grab the tuple of progress values - (get derived_progress_values_lists row_index) - ) + (lambda (append + (current_value) + (get derived_progress_values_lists (current_index)) )) + data + ) + )) + + (if (size trained_series_case_ids) + (seq + ;there were existing cases, update their progress values + (map + (lambda + (assign_to_entities + (get trained_series_case_ids (current_index)) + (zip + progress_features + (get derived_progress_values_lists (current_value)) + ) + ) + ) + trained_case_indices + ) + ;filter out the already trained cases so we only train the new ones + (assign (assoc + data (unzip data (remove (indices data) (append trained_case_indices trained_ablated_indices))) + )) - ;since data is combined with all the previously trained cases, - ;only use the non-trained data indices - (if trained_cases_reversed_order - (trunc data (- continue_series_index)) - (tail data (- continue_series_index)) + ;Check if we need to warn about out of sequence cases + (if + (and + (!= (null) last_trained_time_value) + (< (get (first data) time_feature_index) last_trained_time_value) ) + (accum (assoc + out_of_sequence_series_ids (unzip (first data) id_indices) + )) ) - )) + ) + ) ;train and ablate cases and output created case ids (call !TrainCasesWithAblation (assoc cases data - trained_instance_count (+ trained_instance_count num_previously_trained_cases) ;features have already been encoded encode_features_on_train (false) )) diff --git a/migrations/migrations.amlg b/migrations/migrations.amlg index 31cd73944..88c968416 100644 --- a/migrations/migrations.amlg +++ b/migrations/migrations.amlg @@ -431,5 +431,25 @@ !hyperparameterMetadataMap new_hp_params )) ) - +"95.0.0" + (let + ;sessions must include the field ".total_instance_count", default sessions without this field to the + ;existing ".trained_instance_count" value + (assoc + session_entities (contained_entities [(query_exists ".replay_steps")]) + ) + (map + (lambda + (if (= (null) (retrieve_from_entity (current_value) ".total_instance_count")) + (accum_entity_roots (current_value) (list + (set_labels + (retrieve_from_entity (current_value 1) ".trained_instance_count") + [".total_instance_count"] + ) + )) + ) + ) + session_entities + ) + ) )) \ No newline at end of file diff --git a/unit_tests/unit_test_data/example_timeseries.features.json b/unit_tests/unit_test_data/example_timeseries.features.json new file mode 100644 index 000000000..76b81e2d2 --- /dev/null +++ b/unit_tests/unit_test_data/example_timeseries.features.json @@ -0,0 +1,107 @@ +{ + "ID": { + "type": "nominal", + "data_type": "number", + "decimal_places": 0, + "original_type": { + "data_type": "integer", + "size": 8 + }, + "id_feature": true, + "bounds": { + "allow_null": false + } + }, + "f1": { + "type": "continuous", + "data_type": "number", + "decimal_places": 4, + "original_type": { + "data_type": "numeric", + "size": 8 + }, + "bounds": { + "min": 20.0855, + "max": 8103.0839, + "allow_null": true + }, + "time_series": { + "type": "rate", + "rate_max": [ + 0.0012430732030380078 + ], + "rate_min": [ + -0.0007645089495998057 + ] + } + }, + "f2": { + "type": "continuous", + "data_type": "number", + "decimal_places": 4, + "original_type": { + "data_type": "numeric", + "size": 8 + }, + "bounds": { + "min": 20.0855, + "max": 8103.0839, + "allow_null": true + }, + "time_series": { + "type": "rate", + "rate_max": [ + 0.001388600638608105 + ], + "rate_min": [ + -0.0006637496387383802 + ] + } + }, + "f3": { + "type": "continuous", + "data_type": "number", + "decimal_places": 4, + "original_type": { + "data_type": "numeric", + "size": 8 + }, + "bounds": { + "min": 20.0855, + "max": 8103.0839, + "allow_null": true + }, + "time_series": { + "type": "rate", + "rate_max": [ + 0.0011805197298037438 + ], + "rate_min": [ + -0.0005973067076699289 + ] + } + }, + "date": { + "type": "continuous", + "data_type": "formatted_date_time", + "date_time_format": "%Y-%m-%d", + "original_type": { + "data_type": "string" + }, + "bounds": { + "min": "1985-05-17", + "max": "2083-08-08", + "allow_null": false + }, + "time_series": { + "type": "delta", + "time_feature": true, + "delta_max": [ + 7280646.049344706 + ], + "delta_min": [ + 889973.9440819533 + ] + } + } +} \ No newline at end of file diff --git a/unit_tests/ut_h_ablate.amlg b/unit_tests/ut_h_ablate.amlg index e06e45a2b..c74176899 100644 --- a/unit_tests/ut_h_ablate.amlg +++ b/unit_tests/ut_h_ablate.amlg @@ -80,11 +80,6 @@ abs_threshold_map {accuracy {B 1.0}} )) - (print "threshold train call does not ablate: ") - (call assert_null (assoc - obs (get ablate_train_payload (list 1 "payload" "ablated_indices")) - )) - (call_entity "howso" "set_auto_ablation_params" (assoc auto_ablation_enabled (true) min_num_cases 8 @@ -106,10 +101,16 @@ )) )) + (print "no warnings raised: ") + (call assert_true (assoc + obs (= (null) (get ablate_train_payload [1 "warnings"])) + )) + (call exit_if_failures (assoc msg "No warnings raised") ) + (print "second train call ablates: ") (call assert_same (assoc obs (get ablate_train_payload (list 1 "payload" "ablated_indices")) - exp (list 8) + exp (list 0) )) (call exit_if_failures (assoc msg "Ablation train call") ) @@ -152,15 +153,33 @@ )) )) + (print "no warnings raised: ") + (call assert_true (assoc + obs (= (null) (get ablate_train_payload [1 "warnings"])) + )) + (call exit_if_failures (assoc msg "No warnings raised") ) + (print "third train call ablates all duplicates:") (call assert_same (assoc obs (get ablate_train_payload (list 1 "payload" "ablated_indices")) - ;there are 9 cases in the model, therefore these are training indices 9 and 10 - exp (list 9 10) + exp (list 0 1) )) (call exit_if_failures (assoc msg "Duplicate cases ablation train call") ) + (declare (assoc + session_training_indices + (get + (call_entity "howso" "get_session_training_indices" (assoc session "unit_test")) + [1 "payload"] + ) + )) + (call assert_same (assoc + obs session_training_indices + exp [0 1 2 3 4 5 6 7 9] + )) + (call exit_if_failures (assoc msg "Session indices matched") ) + (declare (assoc point_five_cases (call_entity "howso" "get_cases" (assoc @@ -223,6 +242,12 @@ )) )) + (print "fourth train call ablates all: ") + (call assert_same (assoc + obs (get result (list 1 "payload" "ablated_indices")) + exp (list 0 1) + )) + (assign (assoc pre_reduction_size (size (get (call_entity "howso" "get_cases") @@ -307,6 +332,12 @@ )) )) + (print "no warnings raised: ") + (call assert_true (assoc + obs (= (null) (get ablate_train_payload [1 "warnings"])) + )) + (call exit_if_failures (assoc msg "No warnings raised") ) + (declare (assoc post_train_total_weight (apply "+" (map diff --git a/unit_tests/ut_h_ablate_ts.amlg b/unit_tests/ut_h_ablate_ts.amlg new file mode 100644 index 000000000..46af71975 --- /dev/null +++ b/unit_tests/ut_h_ablate_ts.amlg @@ -0,0 +1,339 @@ +(seq + #unit_test (direct_assign_to_entities (assoc unit_test (load "unit_test.amlg"))) + (call (load "unit_test_howso.amlg") (assoc name "ut_h_ablate_ts.amlg")) + + (declare (assoc + ;201 cases for training 20 batches of 20 and 1 batch of 1 + dataset (trunc (load "unit_test_data/example_timeseries.csv") 202) + feature_attributes + (append + (load "unit_test_data/example_timeseries.features.json") + ;add feature attributes for the index column appended to the dataset + { + "index" + { + "type" "continuous" + "data_type" "number" + "decimal_places" 0 + "bounds" {"allow_null" (false)} + } + } + ) + )) + (declare (assoc + features (append (first dataset) "index") + expected_warnings [] + )) + + (null + #train_and_ablate + (let + (assoc + session "unit_test" + trained_cases (null) + ablated_indices [] + warnings [] + train_statuses [] + ) + + ;create a fresh trainee + (destroy_entities "howso") + (call (load "unit_test_howso.amlg") (assoc name "ut_h_ablate_ts.amlg" skip_init (true)) ) + + (call_entity "howso" "set_feature_attributes" (assoc feature_attributes feature_attributes)) + + (call_entity "howso" "set_auto_ablation_params" (assoc + auto_ablation_enabled (true) + min_num_cases 10 + batch_size 1 + )) + + (call_entity "howso" "set_auto_analyze_params" (assoc + auto_analyze_enabled (true) + analyze_threshold 10 + analyze_growth_factor 1.5 + )) + + ;train the cases in batches of up to 20 + (range + (lambda (let + (assoc + response + (call_entity "howso" "train" (assoc + session session + features features + cases + ;batch up to 20 (filter out nulls if batch exceeds remaining cases) + (filter (unzip + cases + (range (current_index 2) (+ (current_index 2) 19)) + )) + )) + ) + (accum (assoc + train_statuses (get response 0) + warnings (or (get response [1 "warnings"]) []) + ablated_indices + (map + ;offset index by the batch position + (lambda (+ (current_value) (current_index 2))) + (get response [1 "payload" "ablated_indices"]) + ) + )) + + )) + 0 (size cases) 20 + ) + + ;verify all trains were successful + (print "All train batches returned success status: ") + (call assert_same (assoc + obs (size (filter (lambda (= 0 (current_value))) train_statuses)) + exp 0 + )) + (call exit_if_failures (assoc msg "Trains completed succssfully")) + + (if (size expected_warnings) + ;match that at least one of the expected warnings is raised, and no others + (seq + (declare (assoc + matched_warnings + (filter + (lambda (size + ;at least one of the expected warnings should be matched + (filter + (lambda (contains_value (current_value 1) (current_value)) ) + expected_warnings + ) + )) + warnings + ) + )) + (print "Expected warnings were raised: ") + (call assert_true (assoc + obs (> (size matched_warnings) 0) + )) + (print "No unexpected warnigs were raised: ") + (call assert_same (assoc + obs (size matched_warnings) + exp (size warnings) + )) + ) + + ;else expect no warnings + (seq + (print "No warnings were raised: ") + (call assert_same (assoc + obs (size warnings) + exp 0 + )) + ) + ) + (call exit_if_failures (assoc msg "Only expected warnings raised")) + + ;retrieve the trained case index values + (assign (assoc + trained_cases + (get + (call_entity "howso" "get_cases" (assoc + session session + features ["ID" "date" "index" ".session_training_index" ".series_index" ".series_progress"] + )) + [1 "payload" "cases"] + ) + id_feature_index 0 + time_feature_index 1 + original_index 2 + session_train_index 3 + series_index 4 + series_progress_index 5 + )) + (declare (assoc + original_indices (map (lambda (get (current_value) original_index)) trained_cases) + session_training_indices (map (lambda (get (current_value) session_train_index)) trained_cases) + series_indices (map (lambda (get (current_value) series_index)) trained_cases) + id_feature_values (values (map (lambda (get (current_value) id_feature_index)) trained_cases) (true)) + )) + + (print "All cases were trained: ") + (call assert_same (assoc + obs (+ (size trained_cases) (size ablated_indices)) + exp (size cases) + )) + + (print "Ablated indices do not contain nulls: ") + (call assert_false (assoc + obs (contains_value ablated_indices (null)) + )) + + (print "Ablated indices are unique: ") + (call assert_same (assoc + obs (size (values abalted_indices (true))) + exp (size abalted_indices) + )) + + (print "Session training indices match original indices: ") + (call assert_same (assoc + obs original_indices + exp session_training_indices + )) + + (print "Session training indices do not contain nulls: ") + (call assert_false (assoc + obs (contains_value session_training_indices (null)) + )) + + (print "Session training indices are unique: ") + (call assert_same (assoc + obs (size (values session_training_indices (true))) + exp (size trained_cases) + )) + + (print "Ablated indices are not found in the session training indices: ") + (call assert_same (assoc + obs + (size + (filter + (lambda (contains_value session_training_indices (current_value))) + abalted_indices + ) + ) + exp 0 + )) + + (print "Series indices do not contain nulls: ") + (call assert_false (assoc + obs (contains_value series_indices (null)) + )) + + ;sort trained cases by the date column + (assign (assoc + trained_cases + (sort + (lambda (let + (assoc + a (current_value 1) + b (current_value 2) + ) + + (if (~ 0 (get a time_feature_index)) + (- (get a time_feature_index) (get b time_feature_index) ) + (> (get a time_feature_index) (get b time_feature_index) ) + ) + )) + trained_cases + ) + )) + + ;per series checks + (map + (lambda (let + (assoc + series_cases + (filter + (lambda (= (current_value 2) (get (current_value) id_feature_index))) + trained_cases + ) + ) + + (print (concat "Series " (current_value) " indices are unique: ")) + (call assert_same (assoc + obs + (size (values + (map (lambda (get (current_value) series_index)) series_cases) + (true) + )) + exp (size series_cases) + )) + + (print (concat "Series " (current_value) " indices are in order: ")) + (call assert_same (assoc + obs + (size (filter + (lambda + (if (!= 0 (current_index)) + ;match when previous value is >= the current value + (>= + (get series_cases [(- (current_index 1) 1) series_index]) + (get (current_value) series_index) + ) + ) + ) + series_cases + )) + ;if cases are in order, there should be none returned by the filter + exp 0 + )) + + (print (concat "Series " (current_value) " progress are in order: ")) + (call assert_same (assoc + obs + (size (filter + (lambda + (if (!= 0 (current_index)) + ;match when previous value is >= the current value + (>= + (get series_cases [(- (current_index 1) 1) series_progress_index]) + (get (current_value) series_progress_index) + ) + ) + ) + series_cases + )) + ;if cases are in order, there should be none returned by the filter + exp 0 + )) + )) + id_feature_values + ) + + (call exit_if_failures (assoc msg "Train with time series ablation")) + ) + ) + + (print "Test sequential time seires ablation\n") + (call train_and_ablate (assoc + cases + (map + ;map in the expected session training index + (lambda (append (current_value) (current_index)) ) + (tail dataset) + ) + )) + + (print "Test sequential but mixed time seires ablation\n") + (call train_and_ablate (assoc + cases + (map + ;map in the expected session training index + (lambda (append (current_value) (current_index)) ) + ;mixed indices such that each series is still sequential but most train batches include both series + (unzip + (tail dataset) + [ + 0 1 2 3 4 5 6 7 8 9 10 116 117 118 119 120 121 122 123 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 + 26 27 28 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 29 30 31 32 33 34 35 36 37 38 39 + 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 139 140 141 142 143 144 145 146 147 + 148 149 150 151 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 152 153 154 155 156 157 + 158 159 160 161 162 163 164 165 166 167 82 83 84 85 86 87 88 89 90 91 168 169 170 171 172 92 93 94 + 95 96 97 173 174 175 176 177 178 179 180 181 182 183 98 99 100 101 102 103 104 105 106 107 108 109 + 110 111 112 113 114 115 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 + ] + ) + ) + )) + + (print "Test out of order time series ablation\n") + (call train_and_ablate (assoc + ;sort the cases randomly so they are out of order + cases + (map + ;map in the expected session training index + (lambda (append (current_value) (current_index)) ) + (sort (lambda (- (rand) (rand))) (tail dataset)) + ) + expected_warnings ["Training time series cases out of sequence with ablation enabled.*"] + )) + + (call exit_if_failures (assoc msg unit_test_name) ) +) \ No newline at end of file diff --git a/unit_tests/ut_h_migration.amlg b/unit_tests/ut_h_migration.amlg index 1ad7a50db..54b3707a9 100644 --- a/unit_tests/ut_h_migration.amlg +++ b/unit_tests/ut_h_migration.amlg @@ -204,6 +204,13 @@ (call exit_if_failures (assoc msg "Persisted cases." )) + (print "Updated sessions to add total_instance_count: ") + (call assert_same (assoc + obs (retrieve_from_entity ["howso" "session"] ".total_instance_count") + exp 21 + )) + (call exit_if_failures (assoc msg "Updated session labels." )) + ;verify exporting post-api removal versions: (assign_to_entities "howso" (assoc major_version 79))