-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathBaseline.fs
63 lines (58 loc) · 2.12 KB
/
Baseline.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
// 1 Billion Row Challenge in F#
// 1 Billion Row Challenge in F#
module Baseline
open System
open System.Collections.Generic
open System.IO
type StationDataObject = {
mutable Min : double
mutable Max : double
mutable Sum : double
mutable Count : int
}
let run (measurementsPath : string) =
let bufferSize = 64*1024
use measurementsStream = new FileStream(measurementsPath, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize, FileOptions.SequentialScan)
use measurements = new StreamReader(measurementsStream, System.Text.Encoding.UTF8, true, bufferSize)
let stations = Dictionary<string, StationDataObject>()
let mutable entry: StationDataObject = {
Min = 0.0
Max = 0.0
Sum = 0.0
Count = 0
}
let mutable count = 0
let mutable line = measurements.ReadLine()
let stopwatch = System.Diagnostics.Stopwatch.StartNew()
while line <> null do
let parts = line.Split(';')
let station = parts.[0]
let temp = double(parts.[1])
match stations.TryGetValue(station, &entry) with
| true ->
entry.Min <- min entry.Min temp
entry.Max <- max entry.Max temp
entry.Sum <- entry.Sum + temp
entry.Count <- entry.Count + 1
| false ->
stations.[station] <- {
Min = temp
Max = temp
Sum = temp
Count = 1
}
count <- count + 1
line <- measurements.ReadLine()
if (count % 50_000_000) = 0 then
let entriesPerSecond = (float count) / stopwatch.Elapsed.TotalSeconds
let estimatedTotalTime = TimeSpan.FromSeconds (1.0e9 / entriesPerSecond)
printfn $"Processed %d{count} lines (est {estimatedTotalTime})"
let sortedStations =
stations
|> Seq.sortBy (fun (kv : KeyValuePair<string, StationDataObject>) -> kv.Key)
let mutable head = "{"
for station in sortedStations do
let e = station.Value
printf $"%s{head}%s{station.Key}=%.1f{e.Min}/%.1f{e.Sum / double e.Count}/%.1f{e.Max}"
head <- ", "
printfn "}"