Skip to content

Commit

Permalink
fix(stats): fix numeric median calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
b5 committed Nov 14, 2019
1 parent 937d145 commit 24d29b3
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 5 deletions.
Binary file modified api/testdata/api.snapshot
Binary file not shown.
4 changes: 2 additions & 2 deletions lib/datasets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -985,8 +985,8 @@ func TestDatasetRequestsStats(t *testing.T) {
ref string
expected []byte
}{
{"csv: me/cities", "me/cities", []byte(`[{"count":5,"maxLength":8,"minLength":7,"type":"string","unique":5},{"count":5,"histogram":{"bins":[35000,4031500.1,8028000.2,12024500.3,16021000.4,20017500.5,24014000.6,28010500.7,32007000.8,36003500.9,40000001],"frequencies":[3,0,1,0,0,0,0,0,0,1]},"max":40000000,"mean":9817000,"median":300000,"min":35000,"type":"numeric"},{"count":5,"histogram":{"bins":[44.4,46.585,48.769999999999996,50.955,53.14,55.325,57.51,59.695,61.879999999999995,64.065,66.25],"frequencies":[2,0,1,0,0,1,0,0,0,1]},"max":65.25,"mean":52.04,"median":44.4,"min":44.4,"type":"numeric"},{"count":5,"falseCount":1,"trueCount":4,"type":"boolean"}]`)},
{"json: me/sitemap", "me/sitemap", []byte(`[{"count":10,"histogram":{"bins":[24515,26071.5,27628,29184.5,30741,32297.5,33854,35410.5,36967,38523.5,40080],"frequencies":[4,0,3,1,0,0,1,0,0,1]},"key":"contentLength","max":40079,"mean":28825.8,"median":40079,"min":24515,"type":"numeric"},{"count":10,"frequencies":{"text/html; charset=utf-8":10},"key":"contentSniff","maxLength":24,"minLength":24,"type":"string"},{"count":10,"frequencies":{"text/html; charset=utf-8":10},"key":"contentType","maxLength":24,"minLength":24,"type":"string"},{"count":10,"histogram":{"bins":[74291866,475020463.6,875749061.2,1276477658.8000002,1677206256.4,2077934854,2478663451.6000004,2879392049.2000003,3280120646.8,3680849244.4,4081577842],"frequencies":[2,0,0,0,0,0,0,0,0,8]},"key":"duration","max":4081577841,"mean":3276899953.4,"median":89911449,"min":74291866,"type":"numeric"},{"count":10,"key":"hash","maxLength":68,"minLength":68,"type":"string","unique":10},{"key":"links","type":"array","values":[{"count":10,"maxLength":58,"minLength":14,"unique":10},{"count":10,"maxLength":115,"minLength":19,"unique":10},{"count":10,"maxLength":68,"minLength":22,"unique":10},{"count":10,"maxLength":115,"minLength":14,"unique":10},{"count":9,"maxLength":70,"minLength":15,"unique":9},{"count":9,"maxLength":115,"minLength":37,"unique":9},{"count":9,"maxLength":52,"minLength":15,"unique":9},{"count":9,"maxLength":75,"minLength":19,"unique":9},{"count":9,"maxLength":66,"minLength":15,"unique":9},{"count":7,"maxLength":75,"minLength":19,"unique":7},{"count":7,"maxLength":66,"minLength":22,"unique":7},{"count":6,"maxLength":43,"minLength":19,"unique":6},{"count":6,"maxLength":77,"minLength":14,"unique":6},{"count":6,"maxLength":77,"minLength":21,"unique":6},{"count":4,"maxLength":43,"minLength":14,"unique":4},{"count":3,"maxLength":32,"minLength":21,"unique":3},{"count":3,"maxLength":42,"minLength":19,"unique":3},{"count":3,"maxLength":66,"minLength":32,"unique":3},{"count":3,"maxLength":46,"minLength":19,"unique":3},{"count":2,"maxLength":66,"minLength":22,"unique":2},{"count":2,"maxLength":32,"minLength":23,"unique":2},{"count":2,"maxLength":33,"minLength":22,"unique":2},{"count":2,"maxLength":32,"minLength":27,"unique":2},{"count":1,"maxLength":33,"minLength":33,"unique":1},{"count":1,"maxLength":27,"minLength":27,"unique":1}]},{"count":1,"key":"redirectTo","maxLength":18,"minLength":18,"type":"string","unique":1},{"count":11,"histogram":{"bins":[200,210.2,220.4,230.6,240.8,251,261.2,271.4,281.6,291.8,302],"frequencies":[10,0,0,0,0,0,0,0,0,1]},"key":"status","max":301,"mean":209.1818181818182,"median":200,"min":200,"type":"numeric"},{"count":11,"key":"timestamp","maxLength":35,"minLength":35,"type":"string","unique":11},{"count":10,"key":"title","maxLength":88,"minLength":53,"type":"string","unique":10},{"count":11,"key":"url","maxLength":78,"minLength":18,"type":"string","unique":11}]`)},
{"csv: me/cities", "me/cities", []byte(`[{"count":5,"maxLength":8,"minLength":7,"type":"string","unique":5},{"count":5,"histogram":{"bins":[35000,4031500.1,8028000.2,12024500.3,16021000.4,20017500.5,24014000.6,28010500.7,32007000.8,36003500.9,40000001],"frequencies":[3,0,1,0,0,0,0,0,0,1]},"max":40000000,"mean":9817000,"median":300000,"min":35000,"type":"numeric"},{"count":5,"histogram":{"bins":[44.4,46.585,48.769999999999996,50.955,53.14,55.325,57.51,59.695,61.879999999999995,64.065,66.25],"frequencies":[2,0,1,0,0,1,0,0,0,1]},"max":65.25,"mean":52.04,"median":50.65,"min":44.4,"type":"numeric"},{"count":5,"falseCount":1,"trueCount":4,"type":"boolean"}]`)},
{"json: me/sitemap", "me/sitemap", []byte(`[{"count":10,"histogram":{"bins":[24515,26071.5,27628,29184.5,30741,32297.5,33854,35410.5,36967,38523.5,40080],"frequencies":[4,0,3,1,0,0,1,0,0,1]},"key":"contentLength","max":40079,"mean":28825.8,"median":28059,"min":24515,"type":"numeric"},{"count":10,"frequencies":{"text/html; charset=utf-8":10},"key":"contentSniff","maxLength":24,"minLength":24,"type":"string"},{"count":10,"frequencies":{"text/html; charset=utf-8":10},"key":"contentType","maxLength":24,"minLength":24,"type":"string"},{"count":10,"histogram":{"bins":[74291866,475020463.6,875749061.2,1276477658.8000002,1677206256.4,2077934854,2478663451.6000004,2879392049.2000003,3280120646.8,3680849244.4,4081577842],"frequencies":[2,0,0,0,0,0,0,0,0,8]},"key":"duration","max":4081577841,"mean":3276899953.4,"median":4077230086,"min":74291866,"type":"numeric"},{"count":10,"key":"hash","maxLength":68,"minLength":68,"type":"string","unique":10},{"key":"links","type":"array","values":[{"count":10,"maxLength":58,"minLength":14,"unique":10},{"count":10,"maxLength":115,"minLength":19,"unique":10},{"count":10,"maxLength":68,"minLength":22,"unique":10},{"count":10,"maxLength":115,"minLength":14,"unique":10},{"count":9,"maxLength":70,"minLength":15,"unique":9},{"count":9,"maxLength":115,"minLength":37,"unique":9},{"count":9,"maxLength":52,"minLength":15,"unique":9},{"count":9,"maxLength":75,"minLength":19,"unique":9},{"count":9,"maxLength":66,"minLength":15,"unique":9},{"count":7,"maxLength":75,"minLength":19,"unique":7},{"count":7,"maxLength":66,"minLength":22,"unique":7},{"count":6,"maxLength":43,"minLength":19,"unique":6},{"count":6,"maxLength":77,"minLength":14,"unique":6},{"count":6,"maxLength":77,"minLength":21,"unique":6},{"count":4,"maxLength":43,"minLength":14,"unique":4},{"count":3,"maxLength":32,"minLength":21,"unique":3},{"count":3,"maxLength":42,"minLength":19,"unique":3},{"count":3,"maxLength":66,"minLength":32,"unique":3},{"count":3,"maxLength":46,"minLength":19,"unique":3},{"count":2,"maxLength":66,"minLength":22,"unique":2},{"count":2,"maxLength":32,"minLength":23,"unique":2},{"count":2,"maxLength":33,"minLength":22,"unique":2},{"count":2,"maxLength":32,"minLength":27,"unique":2},{"count":1,"maxLength":33,"minLength":33,"unique":1},{"count":1,"maxLength":27,"minLength":27,"unique":1}]},{"count":1,"key":"redirectTo","maxLength":18,"minLength":18,"type":"string","unique":1},{"count":11,"histogram":{"bins":[200,210.2,220.4,230.6,240.8,251,261.2,271.4,281.6,291.8,302],"frequencies":[10,0,0,0,0,0,0,0,0,1]},"key":"status","max":301,"mean":209.1818181818182,"median":200,"min":200,"type":"numeric"},{"count":11,"key":"timestamp","maxLength":35,"minLength":35,"type":"string","unique":11},{"count":10,"key":"title","maxLength":88,"minLength":53,"type":"string","unique":10},{"count":11,"key":"url","maxLength":78,"minLength":18,"type":"string","unique":11}]`)},
}
for i, c := range goodCases {
res := &StatsResponse{}
Expand Down
10 changes: 8 additions & 2 deletions stats/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,15 @@ func (acc *numericAcc) Close() {
acc.mean = acc.mean / float64(acc.count)

if len(acc.histogram) > 0 {
acc.median = acc.histogram[len(acc.histogram)/2]

sort.Float64Slice(acc.histogram).Sort()

if len(acc.histogram)%2 == 0 && len(acc.histogram) > 1 {
acc.median = (acc.histogram[len(acc.histogram)/2-1] + acc.histogram[len(acc.histogram)/2]) / float64(2)
// acc.median = (acc.histogram[len(acc.histogram)/2] + acc.histogram[len(acc.histogram)/2+1]) / float64(2)
} else {
acc.median = acc.histogram[len(acc.histogram)/2]
}

// turn values into a histogram
nBins := 10
acc.dividers = make([]float64, nBins+1)
Expand Down
25 changes: 24 additions & 1 deletion stats/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,30 @@ func TestAllTypesIdentitySchemaArray(t *testing.T) {
},
}

runTestCases(t, allTypesIdentitySchemaArray)
medianTestCase := TestCase{
"median test cast",
`{"type":"array"}`,
`[
[2],
[1],
]`,
[]map[string]interface{}{
{
"count": 2,
"min": float64(1),
"max": float64(2),
"mean": float64(1.5),
"median": float64(1.5),
"type": "numeric",
"histogram": map[string][]float64{
"bins": {1, 1.2, 1.4, 1.6, 1.8, 2, 2.2, 2.4000000000000004, 2.6, 2.8, 3},
"frequencies": {1, 0, 0, 0, 0, 1, 0, 0, 0, 0},
},
},
},
}

runTestCases(t, allTypesIdentitySchemaArray, medianTestCase)
}

func TestAllTypesIdentitySchemaObject(t *testing.T) {
Expand Down

0 comments on commit 24d29b3

Please sign in to comment.