diff --git a/test/pred-sets/ref/inv_hash.cmp b/test/pred-sets/ref/inv_hash.cmp index 0cea003874c..cfc607842ea 100644 --- a/test/pred-sets/ref/inv_hash.cmp +++ b/test/pred-sets/ref/inv_hash.cmp @@ -9,81 +9,102 @@ lda:0 0 skip: options: --oaa 3 :0 -Constant:202096:-0.078247 -Constant[1]:202097:0.021453 -Constant[2]:202098:-0.022712 -e^x:125152:-0.078247 -e^x*e^y:128176:-0.041096 -e^x*e^y[1]:128177:-0.041096 -e^x*e^y[2]:128178:0.041096 -e^x*e^z:158836:-0.041096 -e^x*e^z[1]:158837:-0.041096 -e^x*e^z[2]:158838:0.041096 -e^x*f^a:96492:-0.078247 -e^x*f^a[1]:96493:0.021453 -e^x*f^a[2]:96494:-0.022712 -e^x*f^b:93488:-0.041096 -e^x*f^b[1]:93489:-0.041096 -e^x*f^b[2]:93490:0.041096 -e^x*f^c:189984:-0.078247 -e^x*f^c[1]:189985:0.021453 -e^x*f^c[2]:189986:-0.022712 -e^x*f^y:246988:-0.064023 -e^x*f^y[1]:246989:0.079204 -e^x*f^y[2]:246990:-0.080799 -e^x[1]:125153:0.021453 -e^x[2]:125154:-0.022712 -e^y:27664:-0.041096 -e^y*e^z:125412:-0.041096 -e^y*e^z[1]:125413:-0.041096 -e^y*e^z[2]:125414:0.041096 -e^y*f^a:195964:-0.041096 -e^y*f^a[1]:195965:-0.041096 -e^y*f^a[2]:195966:0.041096 -e^y*f^b:190624:-0.041096 -e^y*f^b[1]:190625:-0.041096 -e^y*f^b[2]:190626:0.041096 -e^y*f^c:91056:-0.041096 -e^y*f^c[1]:91057:-0.041096 -e^y*f^c[2]:91058:0.041096 -e^y[1]:27665:-0.041096 -e^y[2]:27666:0.041096 -e^z:259284:-0.041096 -e^z*f^a:166384:-0.041096 -e^z*f^a[1]:166385:-0.041096 -e^z*f^a[2]:166386:0.041096 -e^z*f^b:171052:-0.041096 -e^z*f^b[1]:171053:-0.041096 -e^z*f^b[2]:171054:0.041096 -e^z*f^c:71484:-0.041096 -e^z*f^c[1]:71485:-0.041096 -e^z*f^c[2]:71486:0.041096 -e^z[1]:259285:-0.041096 -e^z[2]:259286:0.041096 -f^a:57420:-0.078247 -f^a*f^b:123444:-0.041096 -f^a*f^b[1]:123445:-0.041096 -f^a*f^b[2]:123446:0.041096 -f^a*f^c:157988:-0.078247 -f^a*f^c[1]:157989:0.021453 -f^a*f^c[2]:157990:-0.022712 -f^a*f^y:216008:-0.064023 -f^a*f^y[1]:216009:0.079204 -f^a*f^y[2]:216010:-0.080799 -f^a[1]:57421:0.021453 -f^a[2]:57422:-0.022712 -f^b:62864:-0.041096 -f^b*f^c:126768:-0.041096 -f^b*f^c[1]:126769:-0.041096 -f^b*f^c[2]:126770:0.041096 -f^b[1]:62865:-0.041096 -f^b[2]:62866:0.041096 -f^c:228992:-0.078247 -f^c[1]:228993:0.021453 -f^c[2]:228994:-0.022712 -f^y:154732:-0.064023 -f^y*f^c:132:-0.064023 -f^y*f^c[1]:133:0.079204 -f^y*f^c[2]:134:-0.080799 -f^y[1]:154733:0.079204 -f^y[2]:154734:-0.080799 +Constant:202096:-0.061159 +Constant[1]:202097:0.019265 +Constant[2]:202098:-0.020149 +e^x:125152:-0.061159 +e^x*e^x:28736:-0.061159 +e^x*e^x[1]:28737:0.019265 +e^x*e^x[2]:28738:-0.020149 +e^x*e^y:128176:-0.033179 +e^x*e^y[1]:128177:-0.033179 +e^x*e^y[2]:128178:0.033179 +e^x*e^z:158836:-0.033179 +e^x*e^z[1]:158837:-0.033179 +e^x*e^z[2]:158838:0.033179 +e^x*f^a:96492:-0.061159 +e^x*f^a[1]:96493:0.019265 +e^x*f^a[2]:96494:-0.020149 +e^x*f^b:93488:-0.033179 +e^x*f^b[1]:93489:-0.033179 +e^x*f^b[2]:93490:0.033179 +e^x*f^c:189984:-0.061159 +e^x*f^c[1]:189985:0.019265 +e^x*f^c[2]:189986:-0.020149 +e^x*f^y:246988:-0.050365 +e^x*f^y[1]:246989:0.065592 +e^x*f^y[2]:246990:-0.066698 +e^x[1]:125153:0.019265 +e^x[2]:125154:-0.020149 +e^y:27664:-0.033179 +e^y*e^y:160032:-0.033179 +e^y*e^y[1]:160033:-0.033179 +e^y*e^y[2]:160034:0.033179 +e^y*e^z:125412:-0.033179 +e^y*e^z[1]:125413:-0.033179 +e^y*e^z[2]:125414:0.033179 +e^y*f^a:195964:-0.033179 +e^y*f^a[1]:195965:-0.033179 +e^y*f^a[2]:195966:0.033179 +e^y*f^b:190624:-0.033179 +e^y*f^b[1]:190625:-0.033179 +e^y*f^b[2]:190626:0.033179 +e^y*f^c:91056:-0.033179 +e^y*f^c[1]:91057:-0.033179 +e^y*f^c[2]:91058:0.033179 +e^y[1]:27665:-0.033179 +e^y[2]:27666:0.033179 +e^z:259284:-0.033179 +e^z*e^z:105832:-0.033179 +e^z*e^z[1]:105833:-0.033179 +e^z*e^z[2]:105834:0.033179 +e^z*f^a:166384:-0.033179 +e^z*f^a[1]:166385:-0.033179 +e^z*f^a[2]:166386:0.033179 +e^z*f^b:171052:-0.033179 +e^z*f^b[1]:171053:-0.033179 +e^z*f^b[2]:171054:0.033179 +e^z*f^c:71484:-0.033179 +e^z*f^c[1]:71485:-0.033179 +e^z*f^c[2]:71486:0.033179 +e^z[1]:259285:-0.033179 +e^z[2]:259286:0.033179 +f^a:57420:-0.061159 +f^a*f^a:129000:-0.061159 +f^a*f^a[1]:129001:0.019265 +f^a*f^a[2]:129002:-0.020149 +f^a*f^b:123444:-0.033179 +f^a*f^b[1]:123445:-0.033179 +f^a*f^b[2]:123446:0.033179 +f^a*f^c:157988:-0.061159 +f^a*f^c[1]:157989:0.019265 +f^a*f^c[2]:157990:-0.020149 +f^a*f^y:216008:-0.050365 +f^a*f^y[1]:216009:0.065592 +f^a*f^y[2]:216010:-0.066698 +f^a[1]:57421:0.019265 +f^a[2]:57422:-0.020149 +f^b:62864:-0.033179 +f^b*f^b:156704:-0.033179 +f^b*f^b[1]:156705:-0.033179 +f^b*f^b[2]:156706:0.033179 +f^b*f^c:126768:-0.033179 +f^b*f^c[1]:126769:-0.033179 +f^b*f^c[2]:126770:0.033179 +f^b[1]:62865:-0.033179 +f^b[2]:62866:0.033179 +f^c:228992:-0.061159 +f^c*f^c:220416:-0.061159 +f^c*f^c[1]:220417:0.019265 +f^c*f^c[2]:220418:-0.020149 +f^c[1]:228993:0.019265 +f^c[2]:228994:-0.020149 +f^y:154732:-0.050365 +f^y*f^c:132:-0.050365 +f^y*f^c[1]:133:0.065592 +f^y*f^c[2]:134:-0.066698 +f^y*f^y:74344:-0.050365 +f^y*f^y[1]:74345:0.065592 +f^y*f^y[2]:74346:-0.066698 +f^y[1]:154733:0.065592 +f^y[2]:154734:-0.066698 diff --git a/test/train-sets/ref/inv_hash.stderr b/test/train-sets/ref/inv_hash.stderr index 105a7b2a93e..d8f233f8924 100644 --- a/test/train-sets/ref/inv_hash.stderr +++ b/test/train-sets/ref/inv_hash.stderr @@ -10,8 +10,8 @@ Reading datafile = num sources = 1 average since example example current current current loss last counter weight label predict features -1.000000 1.000000 1 1.0 3 1 22 -1.000000 1.000000 2 2.0 2 3 11 +1.000000 1.000000 1 1.0 3 1 28 +1.000000 1.000000 2 2.0 2 3 15 finished run number of examples per pass = 2 @@ -19,4 +19,4 @@ passes used = 1 weighted example sum = 2.000000 weighted label sum = 0.000000 average loss = 1.000000 -total feature number = 33 +total feature number = 43 diff --git a/test/train-sets/ref/redefine.stderr b/test/train-sets/ref/redefine.stderr index 2b6d19b2286..0ce60c45463 100644 --- a/test/train-sets/ref/redefine.stderr +++ b/test/train-sets/ref/redefine.stderr @@ -9,16 +9,16 @@ Reading datafile = train-sets/0080.dat num sources = 1 average since example example current current current loss last counter weight label predict features -1.000000 1.000000 1 1.0 1.0000 0.0000 4 -2.196468 3.392936 2 2.0 2.0000 0.1580 4 -1.483462 0.770456 4 4.0 2.0000 0.9558 4 +1.000000 1.000000 1 1.0 1.0000 0.0000 6 +2.271564 3.543129 2 2.0 2.0000 0.1177 6 +1.505925 0.740287 4 4.0 2.0000 1.0411 6 finished run number of examples per pass = 4 passes used = 1 weighted example sum = 4.000000 weighted label sum = 6.000000 -average loss = 1.483462 +average loss = 1.505925 best constant = 1.500000 best constant's loss = 0.250000 -total feature number = 16 +total feature number = 24 diff --git a/test/train-sets/ref/search_dep_parser.stderr b/test/train-sets/ref/search_dep_parser.stderr index 25c7658c5f6..e7da1d97f61 100644 --- a/test/train-sets/ref/search_dep_parser.stderr +++ b/test/train-sets/ref/search_dep_parser.stderr @@ -10,14 +10,14 @@ average since instance current true current predicted loss last counter output prefix output prefix pass pol made hits gener beta 88.000000 88.000000 1 [43:1 5:2 5:2 5:2 1..] [0:8 1:1 2:1 3:1 4:..] 0 0 144 0 144 0.014199 47.500000 7.000000 2 [2:2 3:5 0:8 3:7 3:4 ] [2:2 0:8 2:4 2:4 2:4 ] 0 0 157 0 156 0.015381 -38.500000 29.500000 4 [4:2 4:2 4:2 7:5 6:..] [2:2 9:2 2:4 2:1 4:..] 0 0 248 0 246 0.024204 -27.125000 15.750000 8 [4:2 4:2 4:2 5:5 0:..] [4:2 3:2 4:2 5:5 0:..] 1 0 551 0 543 0.052760 -16.000000 4.875000 16 [43:1 5:2 5:2 5:2 1..] [43:1 5:2 5:2 5:2 1..] 3 0 1187 0 1134 0.107122 +37.500000 27.500000 4 [4:2 4:2 4:2 7:5 6:..] [2:2 7:5 2:4 2:4 2:..] 0 0 248 0 246 0.024204 +28.000000 18.500000 8 [4:2 4:2 4:2 5:5 0:..] [3:2 3:2 0:8 5:2 3:..] 1 0 551 0 543 0.052760 +17.250000 6.500000 16 [43:1 5:2 5:2 5:2 1..] [43:1 5:2 5:2 5:2 1..] 3 0 1187 0 1134 0.107122 finished run number of examples per pass = 5 passes used = 6 weighted example sum = 30 weighted label sum = 0 -average loss = 8.86667 -total feature number = 696025 +average loss = 9.53333 +total feature number = 593481 diff --git a/vowpalwabbit/interactions.cc b/vowpalwabbit/interactions.cc index 87bc0368996..8450876a8d7 100644 --- a/vowpalwabbit/interactions.cc +++ b/vowpalwabbit/interactions.cc @@ -276,7 +276,7 @@ void sort_and_filter_duplicate_interactions(v_array& vec, bool filter_ */ -// thecode under DEBUG_EVAL_COUNT_OF_GEN_FT below is an alternative way of implementation of eval_count_of_generated_ft() +// the code under DEBUG_EVAL_COUNT_OF_GEN_FT below is an alternative way of implementation of eval_count_of_generated_ft() // it just calls generate_interactions() with small function which counts generated features and sums their squared weights // it's replaced with more fast (?) analytic solution but keeps just in case and for doublecheck. @@ -403,7 +403,7 @@ void eval_count_of_generated_ft(vw& all, example& ec, size_t& new_features_cnt, { const float x = ft->x*ft->x; - if ( ft->x == 1.0 || !feature_self_interactions_for_weight_other_than_1) // must compare ft->x + if ( !PROCESS_SELF_INTERACTIONS(ft->x) ) { for (size_t i = order_of_inter-1; i > 0; --i) results[i] += results[i-1]*x; diff --git a/vowpalwabbit/interactions.h b/vowpalwabbit/interactions.h index ff7a4ec6fd3..8f5702abb33 100644 --- a/vowpalwabbit/interactions.h +++ b/vowpalwabbit/interactions.h @@ -35,12 +35,16 @@ void sort_and_filter_duplicate_interactions(v_array &vec, bool filter_ /* -* By default include interactions of feature with itself if its weight != weight^2. -* For ex. interaction 'aa' with 3 features in namespace a: {1,2,3} generates -* simple combinations of features {12,13,23}. But if weight of feature 2 != 1.0 then -* features {12, 13, 22, 23} will be generated. For 'aaa' it will be {123, 222, 223} +* By default include interactions of feature with itself. +* This approach produces slightly more interactions but it's safier +* for some cases, as discussed in issues/698 +* Previous behaviour was: include interactions of feature with itself only if its weight != weight^2. +* */ -const bool feature_self_interactions_for_weight_other_than_1 = true; +const bool feature_self_interactions = true; +// must return logical expression +/*old: ft_weight != 1.0 && feature_self_interactions_for_weight_other_than_1*/ +#define PROCESS_SELF_INTERACTIONS(ft_weight) feature_self_interactions @@ -159,7 +163,7 @@ inline void generate_interactions(vw& all, example& ec, R& dat, v_array(dat, fst); // next index differs for permutations and simple combinations const feature_class* snd = (!same_namespace) ? features_data[snd_ns].begin : - (fst->x != 1. && feature_self_interactions_for_weight_other_than_1) ? fst : fst+1; + (PROCESS_SELF_INTERACTIONS(fst->x)) ? fst : fst+1; const float& ft_weight = fst->x; for (; snd < snd_end; ++snd) { @@ -204,7 +208,7 @@ inline void generate_interactions(vw& all, example& ec, R& dat, v_arrayx != 1. && feature_self_interactions_for_weight_other_than_1) ? fst : fst+1; + (PROCESS_SELF_INTERACTIONS(fst->x)) ? fst : fst+1; const uint32_t halfhash1 = FNV_prime * fst->weight_index; const float& ft_weight = fst->x; @@ -219,7 +223,7 @@ inline void generate_interactions(vw& all, example& ec, R& dat, v_arrayx != 1. && feature_self_interactions_for_weight_other_than_1) ? snd : snd+1; + (PROCESS_SELF_INTERACTIONS(snd->x)) ? snd : snd+1; for (; thr < thr_end; ++thr) { @@ -293,8 +297,7 @@ inline void generate_interactions(vw& all, example& ec, R& dat, v_arrayloop_end; - if ((*fgd2->ft_arr)[loop_end-margin].x == 1. || // if special case at end of array then we can't exclude more than existing margin - !feature_self_interactions_for_weight_other_than_1) // and we have to + if (!PROCESS_SELF_INTERACTIONS((*fgd2->ft_arr)[loop_end-margin].x)) { ++margin; // otherwise margin can 't be increased if ( (must_skip_interaction = (loop_end < margin)) ) break; @@ -349,7 +352,7 @@ inline void generate_interactions(vw& all, example& ec, R& dat, v_arrayloop_idx = ((cur_feature->x != 1.) && feature_self_interactions_for_weight_other_than_1) ? cur_data->loop_idx : cur_data->loop_idx + 1; + next_data->loop_idx = (PROCESS_SELF_INTERACTIONS(cur_feature->x)) ? cur_data->loop_idx : cur_data->loop_idx + 1; } else next_data->loop_idx = 0; diff --git a/vowpalwabbit/search_dep_parser.cc b/vowpalwabbit/search_dep_parser.cc index a41fbaea15d..77ae2da04c3 100644 --- a/vowpalwabbit/search_dep_parser.cc +++ b/vowpalwabbit/search_dep_parser.cc @@ -29,7 +29,7 @@ namespace DepParserTask { const action REDUCE_RIGHT = 2; const action REDUCE_LEFT = 3; - void initialize(Search::search& srn, size_t& num_actions, po::variables_map& vm) { + void initialize(Search::search& srn, size_t& /*num_actions*/, po::variables_map& vm) { task_data *data = new task_data(); data->action_loss.resize(4,true); data->ex = NULL; @@ -215,17 +215,18 @@ namespace DepParserTask { add_feature(ex, temp[j]+ additional_offset , val_namespace, mask, multiplier); } - size_t count=0; + size_t count=0; for (unsigned char* ns = data->ex->indices.begin; ns != data->ex->indices.end; ns++) { data->ex->sum_feat_sq[(int)*ns] = (float) data->ex->atomics[(int)*ns].size(); count+= data->ex->atomics[(int)*ns].size(); } - for (vector::iterator i = all.pairs.begin(); i != all.pairs.end();i++) - count += data->ex->atomics[(int)(*i)[0]].size()* data->ex->atomics[(int)(*i)[1]].size(); - for (vector::iterator i = all.triples.begin(); i != all.triples.end();i++) - count += data->ex->atomics[(int)(*i)[0]].size()*data->ex->atomics[(int)(*i)[1]].size()*data->ex->atomics[(int)(*i)[2]].size(); - data->ex->num_features = count; - data->ex->total_sum_feat_sq = (float) count; + + size_t new_count; + float new_weight; + INTERACTIONS::eval_count_of_generated_ft(all, *data->ex, new_count, new_weight); + + data->ex->num_features = count + new_count; + data->ex->total_sum_feat_sq = (float) count + new_weight; } void get_valid_actions(v_array & valid_action, uint32_t idx, uint32_t n, uint32_t stack_depth, uint32_t state) {