VowpalWabbit · JohnLangford · Jun 26, 2015 · Jun 25, 2015 · Jun 25, 2015
diff --git a/test/pred-sets/ref/inv_hash.cmp b/test/pred-sets/ref/inv_hash.cmp
@@ -9,81 +9,102 @@ lda:0
 0 skip: 
 options: --oaa 3
 :0
-Constant:202096:-0.078247
-Constant[1]:202097:0.021453
-Constant[2]:202098:-0.022712
-e^x:125152:-0.078247
-e^x*e^y:128176:-0.041096
-e^x*e^y[1]:128177:-0.041096
-e^x*e^y[2]:128178:0.041096
-e^x*e^z:158836:-0.041096
-e^x*e^z[1]:158837:-0.041096
-e^x*e^z[2]:158838:0.041096
-e^x*f^a:96492:-0.078247
-e^x*f^a[1]:96493:0.021453
-e^x*f^a[2]:96494:-0.022712
-e^x*f^b:93488:-0.041096
-e^x*f^b[1]:93489:-0.041096
-e^x*f^b[2]:93490:0.041096
-e^x*f^c:189984:-0.078247
-e^x*f^c[1]:189985:0.021453
-e^x*f^c[2]:189986:-0.022712
-e^x*f^y:246988:-0.064023
-e^x*f^y[1]:246989:0.079204
-e^x*f^y[2]:246990:-0.080799
-e^x[1]:125153:0.021453
-e^x[2]:125154:-0.022712
-e^y:27664:-0.041096
-e^y*e^z:125412:-0.041096
-e^y*e^z[1]:125413:-0.041096
-e^y*e^z[2]:125414:0.041096
-e^y*f^a:195964:-0.041096
-e^y*f^a[1]:195965:-0.041096
-e^y*f^a[2]:195966:0.041096
-e^y*f^b:190624:-0.041096
-e^y*f^b[1]:190625:-0.041096
-e^y*f^b[2]:190626:0.041096
-e^y*f^c:91056:-0.041096
-e^y*f^c[1]:91057:-0.041096
-e^y*f^c[2]:91058:0.041096
-e^y[1]:27665:-0.041096
-e^y[2]:27666:0.041096
-e^z:259284:-0.041096
-e^z*f^a:166384:-0.041096
-e^z*f^a[1]:166385:-0.041096
-e^z*f^a[2]:166386:0.041096
-e^z*f^b:171052:-0.041096
-e^z*f^b[1]:171053:-0.041096
-e^z*f^b[2]:171054:0.041096
-e^z*f^c:71484:-0.041096
-e^z*f^c[1]:71485:-0.041096
-e^z*f^c[2]:71486:0.041096
-e^z[1]:259285:-0.041096
-e^z[2]:259286:0.041096
-f^a:57420:-0.078247
-f^a*f^b:123444:-0.041096
-f^a*f^b[1]:123445:-0.041096
-f^a*f^b[2]:123446:0.041096
-f^a*f^c:157988:-0.078247
-f^a*f^c[1]:157989:0.021453
-f^a*f^c[2]:157990:-0.022712
-f^a*f^y:216008:-0.064023
-f^a*f^y[1]:216009:0.079204
-f^a*f^y[2]:216010:-0.080799
-f^a[1]:57421:0.021453
-f^a[2]:57422:-0.022712
-f^b:62864:-0.041096
-f^b*f^c:126768:-0.041096
-f^b*f^c[1]:126769:-0.041096
-f^b*f^c[2]:126770:0.041096
-f^b[1]:62865:-0.041096
-f^b[2]:62866:0.041096
-f^c:228992:-0.078247
-f^c[1]:228993:0.021453
-f^c[2]:228994:-0.022712
-f^y:154732:-0.064023
-f^y*f^c:132:-0.064023
-f^y*f^c[1]:133:0.079204
-f^y*f^c[2]:134:-0.080799
-f^y[1]:154733:0.079204
-f^y[2]:154734:-0.080799
+Constant:202096:-0.061159
+Constant[1]:202097:0.019265
+Constant[2]:202098:-0.020149
+e^x:125152:-0.061159
+e^x*e^x:28736:-0.061159
+e^x*e^x[1]:28737:0.019265
+e^x*e^x[2]:28738:-0.020149
+e^x*e^y:128176:-0.033179
+e^x*e^y[1]:128177:-0.033179
+e^x*e^y[2]:128178:0.033179
+e^x*e^z:158836:-0.033179
+e^x*e^z[1]:158837:-0.033179
+e^x*e^z[2]:158838:0.033179
+e^x*f^a:96492:-0.061159
+e^x*f^a[1]:96493:0.019265
+e^x*f^a[2]:96494:-0.020149
+e^x*f^b:93488:-0.033179
+e^x*f^b[1]:93489:-0.033179
+e^x*f^b[2]:93490:0.033179
+e^x*f^c:189984:-0.061159
+e^x*f^c[1]:189985:0.019265
+e^x*f^c[2]:189986:-0.020149
+e^x*f^y:246988:-0.050365
+e^x*f^y[1]:246989:0.065592
+e^x*f^y[2]:246990:-0.066698
+e^x[1]:125153:0.019265
+e^x[2]:125154:-0.020149
+e^y:27664:-0.033179
+e^y*e^y:160032:-0.033179
+e^y*e^y[1]:160033:-0.033179
+e^y*e^y[2]:160034:0.033179
+e^y*e^z:125412:-0.033179
+e^y*e^z[1]:125413:-0.033179
+e^y*e^z[2]:125414:0.033179
+e^y*f^a:195964:-0.033179
+e^y*f^a[1]:195965:-0.033179
+e^y*f^a[2]:195966:0.033179
+e^y*f^b:190624:-0.033179
+e^y*f^b[1]:190625:-0.033179
+e^y*f^b[2]:190626:0.033179
+e^y*f^c:91056:-0.033179
+e^y*f^c[1]:91057:-0.033179
+e^y*f^c[2]:91058:0.033179
+e^y[1]:27665:-0.033179
+e^y[2]:27666:0.033179
+e^z:259284:-0.033179
+e^z*e^z:105832:-0.033179
+e^z*e^z[1]:105833:-0.033179
+e^z*e^z[2]:105834:0.033179
+e^z*f^a:166384:-0.033179
+e^z*f^a[1]:166385:-0.033179
+e^z*f^a[2]:166386:0.033179
+e^z*f^b:171052:-0.033179
+e^z*f^b[1]:171053:-0.033179
+e^z*f^b[2]:171054:0.033179
+e^z*f^c:71484:-0.033179
+e^z*f^c[1]:71485:-0.033179
+e^z*f^c[2]:71486:0.033179
+e^z[1]:259285:-0.033179
+e^z[2]:259286:0.033179
+f^a:57420:-0.061159
+f^a*f^a:129000:-0.061159
+f^a*f^a[1]:129001:0.019265
+f^a*f^a[2]:129002:-0.020149
+f^a*f^b:123444:-0.033179
+f^a*f^b[1]:123445:-0.033179
+f^a*f^b[2]:123446:0.033179
+f^a*f^c:157988:-0.061159
+f^a*f^c[1]:157989:0.019265
+f^a*f^c[2]:157990:-0.020149
+f^a*f^y:216008:-0.050365
+f^a*f^y[1]:216009:0.065592
+f^a*f^y[2]:216010:-0.066698
+f^a[1]:57421:0.019265
+f^a[2]:57422:-0.020149
+f^b:62864:-0.033179
+f^b*f^b:156704:-0.033179
+f^b*f^b[1]:156705:-0.033179
+f^b*f^b[2]:156706:0.033179
+f^b*f^c:126768:-0.033179
+f^b*f^c[1]:126769:-0.033179
+f^b*f^c[2]:126770:0.033179
+f^b[1]:62865:-0.033179
+f^b[2]:62866:0.033179
+f^c:228992:-0.061159
+f^c*f^c:220416:-0.061159
+f^c*f^c[1]:220417:0.019265
+f^c*f^c[2]:220418:-0.020149
+f^c[1]:228993:0.019265
+f^c[2]:228994:-0.020149
+f^y:154732:-0.050365
+f^y*f^c:132:-0.050365
+f^y*f^c[1]:133:0.065592
+f^y*f^c[2]:134:-0.066698
+f^y*f^y:74344:-0.050365
+f^y*f^y[1]:74345:0.065592
+f^y*f^y[2]:74346:-0.066698
+f^y[1]:154733:0.065592
+f^y[2]:154734:-0.066698
diff --git a/test/train-sets/ref/inv_hash.stderr b/test/train-sets/ref/inv_hash.stderr
@@ -10,13 +10,13 @@ Reading datafile =
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
-1.000000 1.000000            1            1.0        3        1       22
-1.000000 1.000000            2            2.0        2        3       11
+1.000000 1.000000            1            1.0        3        1       28
+1.000000 1.000000            2            2.0        2        3       15
 
 finished run
 number of examples per pass = 2
 passes used = 1
 weighted example sum = 2.000000
 weighted label sum = 0.000000
 average loss = 1.000000
-total feature number = 33
+total feature number = 43
diff --git a/test/train-sets/ref/redefine.stderr b/test/train-sets/ref/redefine.stderr
@@ -9,16 +9,16 @@ Reading datafile = train-sets/0080.dat
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
-1.000000 1.000000            1            1.0   1.0000   0.0000        4
-2.196468 3.392936            2            2.0   2.0000   0.1580        4
-1.483462 0.770456            4            4.0   2.0000   0.9558        4
+1.000000 1.000000            1            1.0   1.0000   0.0000        6
+2.271564 3.543129            2            2.0   2.0000   0.1177        6
+1.505925 0.740287            4            4.0   2.0000   1.0411        6
 
 finished run
 number of examples per pass = 4
 passes used = 1
 weighted example sum = 4.000000
 weighted label sum = 6.000000
-average loss = 1.483462
+average loss = 1.505925
 best constant = 1.500000
 best constant's loss = 0.250000
-total feature number = 16
+total feature number = 24
diff --git a/test/train-sets/ref/search_dep_parser.stderr b/test/train-sets/ref/search_dep_parser.stderr
@@ -10,14 +10,14 @@ average    since      instance            current true      current predicted
 loss       last        counter           output prefix          output prefix  pass   pol     made     hits    gener  beta    
 88.000000  88.000000         1  [43:1 5:2 5:2 5:2 1..] [0:8 1:1 2:1 3:1 4:..]     0     0      144        0      144  0.014199
 47.500000  7.000000          2  [2:2 3:5 0:8 3:7 3:4 ] [2:2 0:8 2:4 2:4 2:4 ]     0     0      157        0      156  0.015381
-38.500000  29.500000         4  [4:2 4:2 4:2 7:5 6:..] [2:2 9:2 2:4 2:1 4:..]     0     0      248        0      246  0.024204
-27.125000  15.750000         8  [4:2 4:2 4:2 5:5 0:..] [4:2 3:2 4:2 5:5 0:..]     1     0      551        0      543  0.052760
-16.000000  4.875000         16  [43:1 5:2 5:2 5:2 1..] [43:1 5:2 5:2 5:2 1..]     3     0     1187        0     1134  0.107122
+37.500000  27.500000         4  [4:2 4:2 4:2 7:5 6:..] [2:2 7:5 2:4 2:4 2:..]     0     0      248        0      246  0.024204
+28.000000  18.500000         8  [4:2 4:2 4:2 5:5 0:..] [3:2 3:2 0:8 5:2 3:..]     1     0      551        0      543  0.052760
+17.250000  6.500000         16  [43:1 5:2 5:2 5:2 1..] [43:1 5:2 5:2 5:2 1..]     3     0     1187        0     1134  0.107122
 
 finished run
 number of examples per pass = 5
 passes used = 6
 weighted example sum = 30
 weighted label sum = 0
-average loss = 8.86667
-total feature number = 696025
+average loss = 9.53333
+total feature number = 593481
diff --git a/vowpalwabbit/interactions.cc b/vowpalwabbit/interactions.cc
@@ -276,7 +276,7 @@ void sort_and_filter_duplicate_interactions(v_array<v_string>& vec, bool filter_
  */
 
 
-// thecode under DEBUG_EVAL_COUNT_OF_GEN_FT below is an alternative way of implementation of eval_count_of_generated_ft()
+// the code under DEBUG_EVAL_COUNT_OF_GEN_FT below is an alternative way of implementation of eval_count_of_generated_ft()
 // it just calls generate_interactions() with small function which counts generated features and sums their squared weights
 // it's replaced with more fast (?) analytic solution but keeps just in case and for doublecheck.
 
@@ -403,7 +403,7 @@ void eval_count_of_generated_ft(vw& all, example& ec, size_t& new_features_cnt,
                     {
                         const float x = ft->x*ft->x;
 
-                        if ( ft->x == 1.0 || !feature_self_interactions_for_weight_other_than_1) // must compare  ft->x
+                        if ( !PROCESS_SELF_INTERACTIONS(ft->x) )
                         {
                             for (size_t i = order_of_inter-1; i > 0; --i)
                                 results[i] += results[i-1]*x;

diff --git a/vowpalwabbit/interactions.h b/vowpalwabbit/interactions.h
@@ -35,12 +35,16 @@ void sort_and_filter_duplicate_interactions(v_array<v_string> &vec, bool filter_
 
 
 /*
-* By default include interactions of feature with itself if its weight != weight^2.
-* For ex. interaction 'aa' with 3 features in namespace a: {1,2,3} generates
-* simple combinations of features {12,13,23}. But if weight of feature 2 != 1.0 then
-* features {12, 13, 22, 23} will be generated. For 'aaa' it will be {123, 222, 223}
+* By default include interactions of feature with itself.
+* This approach produces slightly more interactions but it's safier
+* for some cases, as discussed in issues/698
+* Previous behaviour was: include interactions of feature with itself only if its weight != weight^2.
+*
 */
-const bool feature_self_interactions_for_weight_other_than_1 = true;
+const bool feature_self_interactions = true;
+// must return logical expression
+/*old: ft_weight != 1.0 && feature_self_interactions_for_weight_other_than_1*/
+#define PROCESS_SELF_INTERACTIONS(ft_weight) feature_self_interactions
 
 
 
@@ -159,7 +163,7 @@ inline void generate_interactions(vw& all, example& ec, R& dat, v_array<feature_
                         call_audit<R ,audit_func>(dat, fst);
                         // next index differs for permutations and simple combinations
                         const feature_class* snd = (!same_namespace) ? features_data[snd_ns].begin :
-                                                                 (fst->x != 1. && feature_self_interactions_for_weight_other_than_1) ? fst : fst+1;
+                                                                 (PROCESS_SELF_INTERACTIONS(fst->x)) ? fst : fst+1;
                         const float& ft_weight = fst->x;
                         for (; snd < snd_end; ++snd)
                         {                            
@@ -204,7 +208,7 @@ inline void generate_interactions(vw& all, example& ec, R& dat, v_array<feature_
 
                                 // next index differs for permutations and simple combinations
                                 const feature_class* snd = (!same_namespace1) ? features_data[snd_ns].begin :
-                                                                          (fst->x != 1. && feature_self_interactions_for_weight_other_than_1) ? fst : fst+1;
+                                                                          (PROCESS_SELF_INTERACTIONS(fst->x)) ? fst : fst+1;
 
                                 const uint32_t halfhash1 = FNV_prime * fst->weight_index;
                                 const float& ft_weight = fst->x;
@@ -219,7 +223,7 @@ inline void generate_interactions(vw& all, example& ec, R& dat, v_array<feature_
 
                                     // next index differs for permutations and simple combinations
                                     const feature_class* thr = (!same_namespace2) ? features_data[thr_ns].begin :
-                                                                              (snd->x != 1. && feature_self_interactions_for_weight_other_than_1) ? snd : snd+1;
+                                                                              (PROCESS_SELF_INTERACTIONS(snd->x)) ? snd : snd+1;
 
                                     for (; thr < thr_end; ++thr)
                                     {
@@ -293,8 +297,7 @@ inline void generate_interactions(vw& all, example& ec, R& dat, v_array<feature_
                         {                            
                             size_t& loop_end = fgd2->loop_end;
 
-                            if ((*fgd2->ft_arr)[loop_end-margin].x == 1. || // if special case at end of array then we can't exclude more than existing margin
-                                    !feature_self_interactions_for_weight_other_than_1)  // and we have to
+                            if (!PROCESS_SELF_INTERACTIONS((*fgd2->ft_arr)[loop_end-margin].x))
                             {
                                 ++margin; // otherwise margin can 't be increased
                                 if ( (must_skip_interaction = (loop_end < margin)) ) break;
@@ -349,7 +352,7 @@ inline void generate_interactions(vw& all, example& ec, R& dat, v_array<feature_
                             // unless feature has weight w and w != w*w. E.g. w != 0 and w != 1. Features with w == 0 are already
                             // filtered out in parce_args.cc::maybeFeature().
 
-                            next_data->loop_idx = ((cur_feature->x != 1.) && feature_self_interactions_for_weight_other_than_1) ? cur_data->loop_idx : cur_data->loop_idx + 1;
+                            next_data->loop_idx = (PROCESS_SELF_INTERACTIONS(cur_feature->x)) ? cur_data->loop_idx : cur_data->loop_idx + 1;
                         }
                         else
                             next_data->loop_idx = 0;

diff --git a/vowpalwabbit/search_dep_parser.cc b/vowpalwabbit/search_dep_parser.cc
@@ -29,7 +29,7 @@ namespace DepParserTask {
   const action REDUCE_RIGHT = 2;
   const action REDUCE_LEFT  = 3;
 
-  void initialize(Search::search& srn, size_t& num_actions, po::variables_map& vm) {
+  void initialize(Search::search& srn, size_t& /*num_actions*/, po::variables_map& vm) {
     task_data *data = new task_data();
     data->action_loss.resize(4,true);
     data->ex = NULL;
@@ -215,17 +215,18 @@ namespace DepParserTask {
       add_feature(ex, temp[j]+ additional_offset , val_namespace, mask, multiplier);
     }
 
-    size_t count=0;
+    size_t count=0;        
     for (unsigned char* ns = data->ex->indices.begin; ns != data->ex->indices.end; ns++) {
       data->ex->sum_feat_sq[(int)*ns] = (float) data->ex->atomics[(int)*ns].size();
       count+= data->ex->atomics[(int)*ns].size();
     }
-    for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++)
-      count += data->ex->atomics[(int)(*i)[0]].size()* data->ex->atomics[(int)(*i)[1]].size();	
-        for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++)
-          count += data->ex->atomics[(int)(*i)[0]].size()*data->ex->atomics[(int)(*i)[1]].size()*data->ex->atomics[(int)(*i)[2]].size();	
-    data->ex->num_features = count;
-    data->ex->total_sum_feat_sq = (float) count;
+
+    size_t new_count;
+    float new_weight;
+    INTERACTIONS::eval_count_of_generated_ft(all, *data->ex, new_count, new_weight);
+
+    data->ex->num_features = count + new_count;
+    data->ex->total_sum_feat_sq = (float) count + new_weight;
   }
 
   void get_valid_actions(v_array<uint32_t> & valid_action, uint32_t idx, uint32_t n, uint32_t stack_depth, uint32_t state) {