nim-lang · timotheecour · Jun 2, 2021 · Jun 2, 2021 · Jun 2, 2021 · Jun 2, 2021
diff --git a/changelog.md b/changelog.md
@@ -121,6 +121,9 @@
 
 - Fixed buffer overflow bugs in `net`
 
+- Added `std/cputicks` containing APIs for nanosecond resolution CPU counters, providing strictly monotonic
+  counters with highest available resolution.
+
 - Exported `sslHandle` from `net` and `asyncnet`.
 
 - Added `sections` iterator in `parsecfg`.

diff --git a/compiler/vmops.nim b/compiler/vmops.nim
@@ -33,6 +33,7 @@ from sighashes import symBodyDigest
 
 # There are some useful procs in vmconv.
 import vmconv
+from std/cputicks import getCpuTicks
 
 template mathop(op) {.dirty.} =
   registerCallback(c, "stdlib.math." & astToStr(op), `op Wrapper`)
@@ -340,3 +341,6 @@ proc registerAdditionalOps*(c: PCtx) =
     let p = a.getVar(0)
     let x = a.getFloat(1)
     addFloatSprintf(p.strVal, x)
+
+  registerCallback c, "stdlib.cputicks.getCpuTicksImpl", proc(a: VmArgs) =
+    setResult(a, getCpuTicks())
diff --git a/lib/pure/random.nim b/lib/pure/random.nim
@@ -618,10 +618,10 @@ proc shuffle*[T](x: var openArray[T]) =
   shuffle(state, x)
 
 when not defined(nimscript) and not defined(standalone):
-  import times
+  import std/cputicks
 
   proc initRand(): Rand =
-    ## Initializes a new Rand state with a seed based on the current time.
+    ## Initializes a new Rand state with a seed based on the current cpu tick.
     ##
     ## The resulting state is independent of the default RNG's state.
     ##
@@ -631,12 +631,7 @@ when not defined(nimscript) and not defined(standalone):
     ## * `initRand proc<#initRand,int64>`_ that accepts a seed for a new Rand state
     ## * `randomize proc<#randomize>`_ that initializes the default RNG using the current time
     ## * `randomize proc<#randomize,int64>`_ that accepts a seed for the default RNG
-    when defined(js):
-      let time = int64(times.epochTime() * 1000) and 0x7fff_ffff
-      result = initRand(time)
-    else:
-      let now = times.getTime()
-      result = initRand(convert(Seconds, Nanoseconds, now.toUnix) + now.nanosecond)
+    result = initRand(getCpuTicks())
 
   since (1, 5, 1):
     export initRand

diff --git a/lib/std/cputicks.nim b/lib/std/cputicks.nim
@@ -0,0 +1,90 @@
+##[
+Experimental API, subject to change
+]##
+
+#[
+Future work:
+* convert ticks to time; see some approaches here: https://quick-bench.com/q/WcbqUWBCoNBJvCP4n8h3kYfZDXU
+* provide feature detection to test whether the CPU supports it (on linux, via /proc/cpuinfo)
+
+## further links
+* https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf
+* https://gist.github.com/savanovich/f07eda9dba9300eb9ccf
+* https://developers.redhat.com/blog/2016/03/11/practical-micro-benchmarking-with-ltrace-and-sched#
+]#
+
+when defined(js):
+  proc getCpuTicksImpl(): int64 =
+    ## Returns ticks in nanoseconds.
+    # xxx consider returning JsBigInt instead of float
+    when defined(nodejs):
+      {.emit: """
+      let process = require('process');
+      `result` = Number(process.hrtime.bigint());
+      """.}
+    else:
+      proc jsNow(): int64 {.importjs: "window.performance.now()".}
+      result = jsNow() * 1_000_000
+else:
+  const header =
+    when defined(posix): "<x86intrin.h>"
+    else: "<intrin.h>"
+  proc getCpuTicksImpl(): uint64 {.importc: "__rdtsc", header: header.}
+
+template getCpuTicks*(): int64 =
+  ## Returns number of CPU ticks as given by `RDTSC` instruction.
+  ## Unlike `std/monotimes.ticks`, this gives a strictly monotonic counter
+  ## and has higher resolution and lower overhead,
+  ## allowing to measure individual instructions (corresponding to time offsets in
+  ## the nanosecond range).
+  ##
+  ## Note that the CPU may reorder instructions.
+  runnableExamples:
+    for i in 0..<100:
+      let t1 = getCpuTicks()
+      # code to benchmark can go here
+      let t2 = getCpuTicks()
+      assert t2 > t1
+  cast[int64](getCpuTicksImpl())
+
+template toInt64(a, b): untyped =
+  cast[int64](cast[uint64](a) or (cast[uint64](d) shl 32))
+
+proc getCpuTicksStart*(): int64 {.inline.} =
+  ## Variant of `getCpuTicks` which uses the `RDTSCP` instruction. Compared to
+  ## `getCpuTicks`, this avoids introducing noise in the measurements caused by
+  ## CPU instruction reordering, and can result in more deterministic results,
+  ## at the expense of extra overhead and requiring asymetric start/stop APIs.
+  runnableExamples:
+    var a = 0
+    for i in 0..<100:
+      let t1 = getCpuTicksStart()
+      # code to benchmark can go here
+      let t2 = getCpuTicksEnd()
+      assert t2 > t1, $(t1, t2)
+  when nimvm: result = getCpuTicks()
+  else:
+    when defined(js): result = getCpuTicks()
+    else:
+      var a {.noinit.}: cuint
+      var d {.noinit.}: cuint
+      # See https://developers.redhat.com/blog/2016/03/11/practical-micro-benchmarking-with-ltrace-and-sched
+      {.emit:"""
+      asm volatile("cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx");
+      asm volatile("rdtsc" : "=a" (a), "=d" (d)); 
+      """.}
+      result = toInt64(a, b)
+
+proc getCpuTicksEnd*(): int64 {.inline.} =
+  ## See `getCpuTicksStart`.
+  when nimvm: result = getCpuTicks()
+  else:
+    when defined(js): result = getCpuTicks()
+    else:
+      var a {.noinit.}: cuint
+      var d {.noinit.}: cuint
+      {.emit:"""
+      asm volatile("rdtscp" : "=a" (a), "=d" (d)); 
+      asm volatile("cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx");
+      """.}
+      result = toInt64(a, b)
diff --git a/lib/std/monotimes.nim b/lib/std/monotimes.nim
@@ -34,6 +34,7 @@ that the actual supported time resolution differs for different systems.
 See also
 ========
 * `times module <times.html>`_
+* `cputicks module <times.html>`_ which provides strictly monotonic cpu counter
 ]##
 
 import times
@@ -54,16 +55,18 @@ when defined(macosx):
 
 when defined(js):
   proc getJsTicks: float =
-    ## Returns ticks in the unit seconds.
+    ## Returns ticks in nanoseconds.
+    # xxx instead, use JsBigInt throughout the API
+    # to avoid `overflowChecks: off` and provide higher precision, but this
+    # requires some care, e.g. because of `proc low*(typ: typedesc[MonoTime]): MonoTime =`
     when defined(nodejs):
       {.emit: """
       let process = require('process');
-      let time = process.hrtime();
-      `result` = time[0] + time[1] / 1000000000;
+      `result` = Number(process.hrtime.bigint());
       """.}
     else:
       proc jsNow(): float {.importjs: "window.performance.now()".}
-      result = jsNow() / 1000
+      result = jsNow() * 1e6
 
   # Workaround for #6752.
   {.push overflowChecks: off.}
@@ -85,13 +88,13 @@ elif defined(windows):
 proc getMonoTime*(): MonoTime {.tags: [TimeEffect].} =
   ## Returns the current `MonoTime` timestamp.
   ##
-  ## When compiled with the JS backend and executed in a browser,
-  ## this proc calls `window.performance.now()`.
+  ## With `js` in browser, this calls `window.performance.now()`, with `-d:nodejs`
+  ## this calls `process.hrtime.bigint();`
+  ##
   ## See [MDN](https://developer.mozilla.org/en-US/docs/Web/API/Performance/now)
   ## for more information.
   when defined(js):
-    let ticks = getJsTicks()
-    result = MonoTime(ticks: (ticks * 1_000_000_000).int64)
+    result = MonoTime(ticks: getJsTicks().int64)
   elif defined(macosx):
     let ticks = mach_absolute_time()
     var machAbsoluteTimeFreq: MachTimebaseInfoData
@@ -112,7 +115,7 @@ proc getMonoTime*(): MonoTime {.tags: [TimeEffect].} =
     let queryPerformanceCounterFreq = 1_000_000_000'u64 div freq
     result = MonoTime(ticks: (ticks * queryPerformanceCounterFreq).int64)
 
-proc ticks*(t: MonoTime): int64 =
+proc ticks*(t: MonoTime): int64 {.inline.} =
   ## Returns the raw ticks value from a `MonoTime`. This value always uses
   ## nanosecond time resolution.
   t.ticks

diff --git a/tests/stdlib/tcputicks.nim b/tests/stdlib/tcputicks.nim
@@ -0,0 +1,22 @@
+discard """
+  targets: "c cpp js"
+  matrix: "; -d:danger"
+"""
+
+import std/cputicks
+
+template main =
+  let n = 100
+  for i in 0..<n:
+    let t1 = getCpuTicks()
+    let t2 = getCpuTicks()
+    doAssert t2 > t1
+
+  for i in 0..<100:
+    let t1 = getCpuTicksStart()
+    # code to benchmark can go here
+    let t2 = getCpuTicksEnd()
+    doAssert t2 > t1
+
+static: main()
+main()
diff --git a/tests/stdlib/tmonotimes.nim b/tests/stdlib/tmonotimes.nim
@@ -4,17 +4,27 @@ discard """
 
 import std/[monotimes, times]
 
-let d = initDuration(nanoseconds = 10)
-let t1 = getMonoTime()
-let t2 = t1 + d
+template main =
+  block:
+    let d = initDuration(nanoseconds = 10)
+    let t1 = getMonoTime()
+    let t2 = t1 + d
 
-doAssert t2 - t1 == d
-doAssert t1 == t1
-doAssert t1 != t2
-doAssert t2 - d == t1
-doAssert t1 < t2
-doAssert t1 <= t2
-doAssert t1 <= t1
-doAssert not(t2 < t1)
-doAssert t1 < high(MonoTime)
-doAssert low(MonoTime) < t1
+    doAssert t2 - t1 == d
+    doAssert t1 == t1
+    doAssert t1 != t2
+    doAssert t2 - d == t1
+    doAssert t1 < t2
+    doAssert t1 <= t2
+    doAssert t1 <= t1
+    doAssert not(t2 < t1)
+    doAssert t1 < high(MonoTime)
+    doAssert low(MonoTime) < t1
+
+  block: # getMonoTime is non-decreasing
+    let a = getMonoTime()
+    let b = getMonoTime()
+    doAssert b >= a
+
+main()
+# static: main() # xxx support
diff --git a/tests/stdlib/trandom.nim b/tests/stdlib/trandom.nim
@@ -1,6 +1,7 @@
 discard """
   joinable: false # to avoid messing with global rand state
   targets: "c js"
+  matrix: "; -d:danger" # this matters because of the `#17898` test
 """
 
 import std/[random, math, stats, sets, tables]
@@ -9,7 +10,7 @@ when not defined(js):
 
 randomize(233)
 
-proc main() =
+proc main1() =
   var occur: array[1000, int]
 
   for i in 0..100_000:
@@ -35,7 +36,7 @@ proc main() =
   # don't use causes integer overflow
   doAssert compiles(rand[int](low(int) .. high(int)))
 
-main()
+main1()
 
 block:
   when not defined(js):
@@ -248,3 +249,33 @@ block: # bug #17670
     type UInt48 = range[0'u64..2'u64^48-1]
     let x = rand(UInt48)
     doAssert x is UInt48
+
+block: # bug #17898
+  let size = 1000
+  var vals = newSeq[Rand](size) 
+  for i in 0..<size:
+    vals[i] = initRand()
+    # this should do as little as possible besides calling initRand to
+    # ensure the test is meaningful
+  template isUnique[T](a: iterable[T]): bool =
+    ## Returns whether `a` contains only unique elements.
+    # xxx move to std/iterutils, refs https://github.com/timotheecour/Nim/issues/746
+    var s: HashSet[T]
+    var ret = true
+    for ai in a:
+      if containsOrIncl(s, ai):
+        ret = false
+        break
+    ret
+
+  doAssert isUnique(items(vals))
+
+
+template main =
+  # xxx move all tests here to test also in VM
+  var s = initRand()
+  let b = s.rand(2)
+  doAssert b <= 2 and b >= 0
+
+static: main()
+main()