From 5933aece9b5f7320e8feb2ef610b59d347c74d27 Mon Sep 17 00:00:00 2001 From: hlaaftana <10591326+hlaaftana@users.noreply.github.com> Date: Tue, 23 Nov 2021 18:30:17 +0300 Subject: [PATCH] `caseStmtMacros` no longer experimental, experimental manual refactor (#19173) * `caseStmtMacros` no longer experimental, experimental manual refactor * Update doc/manual.rst * apply review suggestions * apply review Co-authored-by: Andreas Rumpf --- changelog.md | 2 + compiler/options.nim | 2 +- compiler/semstmts.nim | 8 +- doc/manual.rst | 413 ++++-- doc/manual_experimental.rst | 2400 ++++++++++++++----------------- lib/core/macros.nim | 7 +- tests/macros/tcasestmtmacro.nim | 2 - 7 files changed, 1426 insertions(+), 1408 deletions(-) diff --git a/changelog.md b/changelog.md index 8e70513e01e80..b645bc5c7935b 100644 --- a/changelog.md +++ b/changelog.md @@ -43,6 +43,8 @@ x, y, z: int Baz = object ``` +- [Case statement macros](manual.html#macros-case-statement-macros) are no longer experimental, + meaning you no longer need to enable the experimental switch `caseStmtMacros` to use them. ## Compiler changes diff --git a/compiler/options.nim b/compiler/options.nim index 2feaa2d436ae2..d8e5057f9adf1 100644 --- a/compiler/options.nim +++ b/compiler/options.nim @@ -195,7 +195,7 @@ type notnil, dynamicBindSym, forLoopMacros, # not experimental anymore; remains here for backwards compatibility - caseStmtMacros, + caseStmtMacros, # ditto codeReordering, compiletimeFFI, ## This requires building nim with `-d:nimHasLibFFI` diff --git a/compiler/semstmts.nim b/compiler/semstmts.nim index 36d76608e39f7..f302dd4c3e197 100644 --- a/compiler/semstmts.nim +++ b/compiler/semstmts.nim @@ -987,10 +987,10 @@ proc semCase(c: PContext, n: PNode; flags: TExprFlags): PNode = else: popCaseContext(c) closeScope(c) - if caseStmtMacros in c.features: - result = handleCaseStmtMacro(c, n, flags) - if result != nil: - return result + #if caseStmtMacros in c.features: + result = handleCaseStmtMacro(c, n, flags) + if result != nil: + return result localError(c.config, n[0].info, errSelectorMustBeOfCertainTypes) return for i in 1..`_. +See also `custom numeric literals <#custom-numeric-literals>`_. -Numeric Literals +Numeric literals ---------------- Numeric literals have the form:: @@ -625,7 +625,7 @@ Hence: 0b10000000'u8 == 0x80'u8 == 128, but, 0b10000000'i8 == 0x80'i8 == -1 instead of causing an overflow error. -Custom Numeric Literals +Custom numeric literals ~~~~~~~~~~~~~~~~~~~~~~~ If the suffix is not predefined, then the suffix is assumed to be a call @@ -700,26 +700,6 @@ contain a dot: `{..}` are the three tokens `{`:tok:, `..`:tok:, `}`:tok: and not the two tokens `{.`:tok:, `.}`:tok:. -Unicode Operators ------------------ - -Under the `--experimental:unicodeOperators` switch these Unicode operators are -also parsed as operators:: - - ∙ ∘ × ★ ⊗ ⊘ ⊙ ⊛ ⊠ ⊡ ∩ ∧ ⊓ # same priority as * (multiplication) - ± ⊕ ⊖ ⊞ ⊟ ∪ ∨ ⊔ # same priority as + (addition) - - -If enabled, Unicode operators can be combined with non-Unicode operator -symbols. The usual precedence extensions then apply, for example, `⊠=` is an -assignment like operator just like `*=` is. - -No Unicode normalization step is performed. - -**Note**: Due to parser limitations one **cannot** enable this feature via a -pragma `{.experimental: "unicodeOperators".}` reliably. - - Syntax ====== @@ -1323,46 +1303,6 @@ as `MyEnum.value`: To implement bit fields with enums see `Bit fields <#set-type-bit-fields>`_ -Overloadable enum field names ------------------------------ - -To be enabled via `{.experimental: "overloadableEnums".}`. - -Enum field names are overloadable much like routines. When an overloaded -enum field is used, it produces a closed sym choice construct, here -written as `(E|E)`. -During overload resolution the right `E` is picked, if possible. -For (array/object...) constructors the right `E` is picked, comparable to -how `[byte(1), 2, 3]` works, one needs to use `[T.E, E2, E3]`. Ambiguous -enum fields produce a static error: - -.. code-block:: nim - :test: "nim c $1" - - {.experimental: "overloadableEnums".} - - type - E1 = enum - value1, - value2 - E2 = enum - value1, - value2 = 4 - - const - Lookuptable = [ - E1.value1: "1", - value2: "2" - ] - - proc p(e: E1) = - # disambiguation in 'case' statements: - case e - of value1: echo "A" - of value2: echo "B" - - p value2 - String type ----------- @@ -1684,11 +1624,6 @@ must match the order of the tuple's definition. Different tuple-types are *equivalent* if they specify the same fields of the same type in the same order. The *names* of the fields also have to be the same. -The assignment operator for tuples copies each component. -The default assignment operator for objects copies each component. Overloading -of the assignment operator is described `here -`_. - .. code-block:: nim type @@ -1765,6 +1700,10 @@ introduce new object roots apart from `system.RootObj`. Student = ref object of Person # Error: inheritance only works with non-final objects id: int +The assignment operator for tuples and objects copies each component. +The methods to override this copying behavior are described `here +`_. + Object construction ------------------- @@ -2685,6 +2624,7 @@ Varargs matching See `Varargs <#types-varargs>`_. + iterable -------- @@ -2725,6 +2665,24 @@ available. Let `p` be an overloaded symbol. These contexts are: As usual, ambiguous matches produce a compile-time error. +Named argument overloading +-------------------------- + +Routines with the same type signature can be called individually if +a parameter has different names between them. + +.. code-block:: Nim + proc foo(x: int) = + echo "Using x: ", x + proc foo(y: int) = + echo "Using y: ", y + + foo(x = 2) # Using x: 2 + foo(y = 2) # Using y: 2 + +Not supplying the parameter name in such cases results in an +ambiguity error. + Statements and expressions ========================== @@ -3200,7 +3158,7 @@ Return statement Example: .. code-block:: nim - return 40+2 + return 40 + 2 The `return` statement ends the execution of the current procedure. It is only allowed in procedures. If there is an `expr`, this is syntactic @@ -3836,8 +3794,8 @@ behavior inside loop bodies. See `closureScope `_ and `capture `_ for details on how to change this behavior. -Anonymous Procs ---------------- +Anonymous procedures +-------------------- Unnamed procedures can be used as lambda expressions to pass into other procedures: @@ -3845,8 +3803,8 @@ procedures: .. code-block:: nim var cities = @["Frankfurt", "Tokyo", "New York", "Kyiv"] - cities.sort(proc (x,y: string): int = - cmp(x.len, y.len)) + cities.sort(proc (x, y: string): int = + cmp(x.len, y.len)) Procs as expressions can appear both as nested procs and inside top-level @@ -3854,6 +3812,42 @@ executable code. The `sugar `_ module contains the `=>` macro which enables a more succinct syntax for anonymous procedures resembling lambdas as they are in languages like JavaScript, C#, etc. +Do notation +----------- + +As a special convenience notation that keeps most elements of a +regular proc expression, the `do` keyword can be used to pass +anonymous procedures to routines: + +.. code-block:: nim + var cities = @["Frankfurt", "Tokyo", "New York", "Kyiv"] + + sort(cities) do (x, y: string) -> int: + cmp(x.len, y.len) + + # Less parentheses using the method plus command syntax: + cities = cities.map do (x: string) -> string: + "City of " & x + +`do` is written after the parentheses enclosing the regular proc params. +The proc expression represented by the `do` block is appended to the routine +call as the last argument. In calls using the command syntax, the `do` block +will bind to the immediately preceding expression rather than the command call. + +`do` with a parameter list corresponds to an anonymous `proc`, however +`do` without parameters is treated as a normal statement list. This allows +macros to receive both indented statement lists as an argument in inline +calls, as well as a direct mirror of Nim's routine syntax. + +.. code-block:: nim + # Passing a statement list to an inline macro: + macroResults.add quote do: + if not `ex`: + echo `info`, ": Check failed: ", `expString` + + # Processing a routine definition in a macro: + rpc(router, "add") do (a, b: int) -> int: + result = a + b Func ---- @@ -5133,7 +5127,7 @@ code: deletedKeys: seq[bool] -Type Classes +Type classes ------------ A type class is a special pseudo-type that can be used to match against @@ -5863,7 +5857,15 @@ twice: While macros enable advanced compile-time code transformations, they cannot change Nim's syntax. -Debug Example +**Style note:** For code readability, it is best to use the least powerful +programming construct that remains expressive. So the "check list" is: + +(1) Use an ordinary proc/iterator, if possible. +(2) Else: Use a generic proc/iterator, if possible. +(3) Else: Use a template, if possible. +(4) Else: Use a macro. + +Debug example ------------- The following example implements a powerful `debug` command that accepts a @@ -5921,7 +5923,7 @@ constructor expression. This is why `debug` iterates over all of `args`'s children. -BindSym +bindSym ------- The above `debug` macro relies on the fact that `write`, `writeLine` and @@ -5970,43 +5972,38 @@ However, the symbols `write`, `writeLine` and `stdout` are already bound and are not looked up again. As the example shows, `bindSym` does work with overloaded symbols implicitly. -Case-Of Macro -------------- +Note that the symbol names passed to `bindSym` have to be constant. The +experimental feature `dynamicBindSym` (`experimental manual +`_) +allows this value to be computed dynamically. -In Nim, it is possible to have a macro with the syntax of a *case-of* -expression just with the difference that all *of-branches* are passed to -and processed by the macro implementation. It is then up the macro -implementation to transform the *of-branches* into a valid Nim -statement. The following example should show how this feature could be -used for a lexical analyzer. - -.. code-block:: nim - import std/macros - - macro case_token(args: varargs[untyped]): untyped = - echo args.treeRepr - # creates a lexical analyzer from regular expressions - # ... (implementation is an exercise for the reader ;-) - discard +Post-statement blocks +--------------------- - case_token: # this colon tells the parser it is a macro statement - of r"[A-Za-z_]+[A-Za-z_0-9]*": - return tkIdentifier - of r"0-9+": - return tkInteger - of r"[\+\-\*\?]+": - return tkOperator +Macros can receive `of`, `elif`, `else`, `except`, `finally` and `do` +blocks (including their different forms such as `do` with routine parameters) +as arguments if called in statement form. + +.. code-block:: nim + macro performWithUndo(task, undo: untyped) = ... + + performWithUndo do: + # multiple-line block of code + # to perform the task + do: + # code to undo it + + let num = 12 + # a single colon may be used if there is no initial block + match (num mod 3, num mod 5): + of (0, 0): + echo "FizzBuzz" + of (0, _): + echo "Fizz" + of (_, 0): + echo "Buzz" else: - return tkUnknown - - -**Style note**: For code readability, it is best to use the least powerful -programming construct that still suffices. So the "check list" is: - -(1) Use an ordinary proc/iterator, if possible. -(2) Else: Use a generic proc/iterator, if possible. -(3) Else: Use a template, if possible. -(4) Else: Use a macro. + echo num For loop macro @@ -6072,6 +6069,48 @@ Another example: echo a, " ", b +Case statement macros +--------------------- + +Macros named `` `case` `` can provide implementations of `case` statements +for certain types. The following is an example of such an implementation +for tuples, leveraging the existing equality operator for tuples +(as provided in `system.==`): + +.. code-block:: nim + :test: "nim c $1" + import std/macros + + macro `case`(n: tuple): untyped = + result = newTree(nnkIfStmt) + let selector = n[0] + for i in 1 ..< n.len: + let it = n[i] + case it.kind + of nnkElse, nnkElifBranch, nnkElifExpr, nnkElseExpr: + result.add it + of nnkOfBranch: + for j in 0..it.len-2: + let cond = newCall("==", selector, it[j]) + result.add newTree(nnkElifBranch, cond, it[^1]) + else: + error "custom 'case' for tuple cannot handle this node", it + + case ("foo", 78) + of ("foo", 78): echo "yes" + of ("bar", 88): echo "no" + else: discard + +`case` macros are subject to overload resolution. The type of the +`case` statement's selector expression is matched against the type +of the first argument of the `case` macro. Then the complete `case` +statement is passed in place of the argument and the macro is evaluated. + +In other words, the macro needs to transform the full `case` statement +but only the statement's selector expression is used to determine which +macro to call. + + Special Types ============= @@ -6959,7 +6998,8 @@ experimental pragma The `experimental` pragma enables experimental language features. Depending on the concrete feature, this means that the feature is either considered too unstable for an otherwise stable release or that the future of the feature -is uncertain (it may be removed at any time). +is uncertain (it may be removed at any time). See the +`experimental manual `_ for more details. Example: @@ -7060,6 +7100,21 @@ alignment requirement of the type are ignored. This pragma has no effect on the JS backend. +Noalias pragma +============== + +Since version 1.4 of the Nim compiler, there is a `.noalias` annotation for variables +and parameters. It is mapped directly to C/C++'s `restrict`:c: keyword and means that +the underlying pointer is pointing to a unique location in memory, no other aliases to +this location exist. It is *unchecked* that this alias restriction is followed. If the +restriction is violated, the backend optimizer is free to miscompile the code. +This is an **unsafe** language feature. + +Ideally in later versions of the language, the restriction will be enforced at +compile time. (This is also why the name `noalias` was choosen instead of a more +verbose name like `unsafeAssumeNoAlias`.) + + Volatile pragma --------------- The `volatile` pragma is for variables only. It declares the variable as @@ -7641,7 +7696,7 @@ Example: {.pragma: rtl, importc, dynlib: "client.dll", cdecl.} proc p*(a, b: int): int {.rtl.} = - result = a+b + result = a + b In the example, a new pragma named `rtl` is introduced that either imports a symbol from a dynamic library or exports the symbol for dynamic library @@ -8032,3 +8087,137 @@ Threads and exceptions The interaction between threads and exceptions is simple: A *handled* exception in one thread cannot affect any other thread. However, an *unhandled* exception in one thread terminates the whole *process*. + + +Guards and locks +================ + +Nim provides common low level concurrency mechanisms like locks, atomic +intrinsics or condition variables. + +Nim significantly improves on the safety of these features via additional +pragmas: + +1) A `guard`:idx: annotation is introduced to prevent data races. +2) Every access of a guarded memory location needs to happen in an + appropriate `locks`:idx: statement. + + +Guards and locks sections +------------------------- + +Protecting global variables +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Object fields and global variables can be annotated via a `guard` pragma: + +.. code-block:: nim + + var glock: TLock + var gdata {.guard: glock.}: int + +The compiler then ensures that every access of `gdata` is within a `locks` +section: + +.. code-block:: nim + + proc invalid = + # invalid: unguarded access: + echo gdata + + proc valid = + # valid access: + {.locks: [glock].}: + echo gdata + +Top level accesses to `gdata` are always allowed so that it can be initialized +conveniently. It is *assumed* (but not enforced) that every top level statement +is executed before any concurrent action happens. + +The `locks` section deliberately looks ugly because it has no runtime +semantics and should not be used directly! It should only be used in templates +that also implement some form of locking at runtime: + +.. code-block:: nim + + template lock(a: TLock; body: untyped) = + pthread_mutex_lock(a) + {.locks: [a].}: + try: + body + finally: + pthread_mutex_unlock(a) + + +The guard does not need to be of any particular type. It is flexible enough to +model low level lockfree mechanisms: + +.. code-block:: nim + + var dummyLock {.compileTime.}: int + var atomicCounter {.guard: dummyLock.}: int + + template atomicRead(x): untyped = + {.locks: [dummyLock].}: + memoryReadBarrier() + x + + echo atomicRead(atomicCounter) + + +The `locks` pragma takes a list of lock expressions `locks: [a, b, ...]` +in order to support *multi lock* statements. Why these are essential is +explained in the `lock levels <#guards-and-locks-lock-levels>`_ section. + + +Protecting general locations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `guard` annotation can also be used to protect fields within an object. +The guard then needs to be another field within the same object or a +global variable. + +Since objects can reside on the heap or on the stack, this greatly enhances +the expressivity of the language: + +.. code-block:: nim + + type + ProtectedCounter = object + v {.guard: L.}: int + L: TLock + + proc incCounters(counters: var openArray[ProtectedCounter]) = + for i in 0..counters.high: + lock counters[i].L: + inc counters[i].v + +The access to field `x.v` is allowed since its guard `x.L` is active. +After template expansion, this amounts to: + +.. code-block:: nim + + proc incCounters(counters: var openArray[ProtectedCounter]) = + for i in 0..counters.high: + pthread_mutex_lock(counters[i].L) + {.locks: [counters[i].L].}: + try: + inc counters[i].v + finally: + pthread_mutex_unlock(counters[i].L) + +There is an analysis that checks that `counters[i].L` is the lock that +corresponds to the protected location `counters[i].v`. This analysis is called +`path analysis`:idx: because it deals with paths to locations +like `obj.field[i].fieldB[j]`. + +The path analysis is **currently unsound**, but that doesn't make it useless. +Two paths are considered equivalent if they are syntactically the same. + +This means the following compiles (for now) even though it really should not: + +.. code-block:: nim + + {.locks: [a[i].L].}: + inc i + access a[i].v diff --git a/doc/manual_experimental.rst b/doc/manual_experimental.rst index 458512292f3e5..4ed8439dffe63 100644 --- a/doc/manual_experimental.rst +++ b/doc/manual_experimental.rst @@ -19,47 +19,8 @@ Some of these are not covered by the `.experimental` pragma or one may want to use Nim libraries using these features without using them oneself. -**Note**: Unless otherwise indicated, these features are not to be removed, -but refined and overhauled. - - -Package level objects -===================== - -Every Nim module resides in a (nimble) package. An object type can be attached -to the package it resides in. If that is done, the type can be referenced from -other modules as an `incomplete`:idx: object type. This feature allows to -break up recursive type dependencies across module boundaries. Incomplete -object types are always passed `byref` and can only be used in pointer like -contexts (`var/ref/ptr IncompleteObject`) in general since the compiler does -not yet know the size of the object. To complete an incomplete object -the `package` pragma has to be used. `package` implies `byref`. - -As long as a type `T` is incomplete, neither `sizeof(T)` nor runtime -type information for `T` is available. - - -Example: - -.. code-block:: nim - - # module A (in an arbitrary package) - type - Pack.SomeObject = object ## declare as incomplete object of package 'Pack' - Triple = object - a, b, c: ref SomeObject ## pointers to incomplete objects are allowed - - ## Incomplete objects can be used as parameters: - proc myproc(x: SomeObject) = discard - - -.. code-block:: nim - - # module B (in package "Pack") - type - SomeObject* {.package.} = object ## Use 'package' to complete the object - s, t: string - x, y: int +.. note:: Unless otherwise indicated, these features are not to be removed, + but refined and overhauled. Void type @@ -105,23 +66,106 @@ The `void` type is only valid for parameters and return types; other symbols cannot have the type `void`. -Automatic dereferencing -======================= +Unicode Operators +================= -Automatic dereferencing is performed for the first argument of a routine call. -This feature has to be enabled via `{.experimental: "implicitDeref".}`: +Under the `--experimental:unicodeOperators`:option: switch, +these Unicode operators are also parsed as operators:: + + ∙ ∘ × ★ ⊗ ⊘ ⊙ ⊛ ⊠ ⊡ ∩ ∧ ⊓ # same priority as * (multiplication) + ± ⊕ ⊖ ⊞ ⊟ ∪ ∨ ⊔ # same priority as + (addition) + + +If enabled, Unicode operators can be combined with non-Unicode operator +symbols. The usual precedence extensions then apply, for example, `⊠=` is an +assignment like operator just like `*=` is. + +No Unicode normalization step is performed. + +.. note:: Due to parser limitations one **cannot** enable this feature via a + pragma `{.experimental: "unicodeOperators".}` reliably. + + +Overloadable enum value names +============================= + +Enabled via `{.experimental: "overloadableEnums".}`. + +Enum value names are overloadable, much like routines. If both of the enums +`T` and `U` have a member named `foo`, then the identifier `foo` corresponds +to a choice between `T.foo` and `U.foo`. During overload resolution, +the correct type of `foo` is decided from the context. If the type of `foo` is +ambiguous, a static error will be produced. .. code-block:: nim + :test: "nim c $1" - {.experimental: "implicitDeref".} + {.experimental: "overloadableEnums".} - proc depth(x: NodeObj): int = ... + type + E1 = enum + value1, + value2 + E2 = enum + value1, + value2 = 4 - var - n: Node - new(n) - echo n.depth - # no need to write n[].depth either + const + Lookuptable = [ + E1.value1: "1", + # no need to qualify value2, known to be E1.value2 + value2: "2" + ] + + proc p(e: E1) = + # disambiguation in 'case' statements: + case e + of value1: echo "A" + of value2: echo "B" + + p value2 + + +Package level objects +===================== + +Every Nim module resides in a (nimble) package. An object type can be attached +to the package it resides in. If that is done, the type can be referenced from +other modules as an `incomplete`:idx: object type. This feature allows to +break up recursive type dependencies across module boundaries. Incomplete +object types are always passed `byref` and can only be used in pointer like +contexts (`var/ref/ptr IncompleteObject`) in general, since the compiler does +not yet know the size of the object. To complete an incomplete object, +the `package` pragma has to be used. `package` implies `byref`. + +As long as a type `T` is incomplete, no runtime type information for `T` is +available. + + +Example: + +.. code-block:: nim + + # module A (in an arbitrary package) + type + Pack.SomeObject = object # declare as incomplete object of package 'Pack' + Triple = object + a, b, c: ref SomeObject # pointers to incomplete objects are allowed + + # Incomplete objects can be used as parameters: + proc myproc(x: SomeObject) = discard + + +.. code-block:: nim + + # module B (in package "Pack") + type + SomeObject* {.package.} = object # Use 'package' to complete the object + s, t: string + x, y: int + +This feature will likely be superseded in the future by support for +recursive module dependencies. Code reordering @@ -230,65 +274,30 @@ scope. Therefore, the following will *fail to compile:* a() +This feature will likely be replaced with a better solution to remove +the need for forward declarations. -Named argument overloading -========================== - -Routines with the same type signature can be called differently if a parameter -has different names. This does not need an `experimental` switch, but is an -unstable feature. - -.. code-block:: Nim - proc foo(x: int) = - echo "Using x: ", x - proc foo(y: int) = - echo "Using y: ", y - - foo(x = 2) - # Using x: 2 - foo(y = 2) - # Using y: 2 - - -Do notation -=========== +Automatic dereferencing +======================= -As a special more convenient notation, proc expressions involved in procedure -calls can use the `do` keyword: +Automatic dereferencing is performed for the first argument of a routine call. +This feature has to be enabled via `{.experimental: "implicitDeref".}`: .. code-block:: nim - sort(cities) do (x,y: string) -> int: - cmp(x.len, y.len) - - # Less parenthesis using the method plus command syntax: - cities = cities.map do (x:string) -> string: - "City of " & x - - # In macros, the do notation is often used for quasi-quoting - macroResults.add quote do: - if not `ex`: - echo `info`, ": Check failed: ", `expString` - -`do` is written after the parentheses enclosing the regular proc params. -The proc expression represented by the do block is appended to them. -In calls using the command syntax, the do block will bind to the immediately -preceding expression, transforming it in a call. - -`do` with parentheses is an anonymous `proc`; however a `do` without -parentheses is just a block of code. The `do` notation can be used to -pass multiple blocks to a macro: + {.experimental: "implicitDeref".} -.. code-block:: nim + type + NodeObj = object + # ... + Node = ref NodeObj - macro performWithUndo(task, undo: untyped) = ... + proc depth(x: NodeObj): int = ... - performWithUndo do: - # multiple-line block of code - # to perform the task - do: - # code to undo it + let n = Node() + echo n.depth + # no need to write n[].depth Special Operators @@ -297,8 +306,8 @@ Special Operators dot operators ------------- -**Note**: Dot operators are still experimental and so need to be enabled -via `{.experimental: "dotOperators".}`. +.. note:: Dot operators are still experimental and so need to be enabled + via `{.experimental: "dotOperators".}`. Nim offers a special family of dot operators that can be used to intercept and rewrite proc call and field access attempts, referring @@ -418,1296 +427,1141 @@ here. .. include:: manual_experimental_strictnotnil.rst -Concepts -======== +Aliasing restrictions in parameter passing +========================================== -Concepts, also known as "user-defined type classes", are used to specify an -arbitrary set of requirements that the matched type must satisfy. +.. note:: The aliasing restrictions are currently not enforced by the + implementation and need to be fleshed out further. -Concepts are written in the following form: +"Aliasing" here means that the underlying storage locations overlap in memory +at runtime. An "output parameter" is a parameter of type `var T`, +an input parameter is any parameter that is not of type `var`. -.. code-block:: nim +1. Two output parameters should never be aliased. +2. An input and an output parameter should not be aliased. +3. An output parameter should never be aliased with a global or thread local + variable referenced by the called proc. +4. An input parameter should not be aliased with a global or thread local + variable updated by the called proc. - type - Comparable = concept x, y - (x < y) is bool +One problem with rules 3 and 4 is that they affect specific global or thread +local variables, but Nim's effect tracking only tracks "uses no global variable" +via `.noSideEffect`. The rules 3 and 4 can also be approximated by a different rule: - Stack[T] = concept s, var v - s.pop() is T - v.push(T) +5. A global or thread local variable (or a location derived from such a location) + can only passed to a parameter of a `.noSideEffect` proc. - s.len is Ordinal - for value in s: - value is T +Strict funcs +============ -The concept is a match if: +Since version 1.4, a stricter definition of "side effect" is available. +In addition to the existing rule that a side effect is calling a function +with side effects, the following rule is also enforced: -a) all of the expressions within the body can be compiled for the tested type -b) all statically evaluable boolean expressions in the body must be true +Any mutation to an object does count as a side effect if that object is reachable +via a parameter that is not declared as a `var` parameter. -The identifiers following the `concept` keyword represent instances of the -currently matched type. You can apply any of the standard type modifiers such -as `var`, `ref`, `ptr` and `static` to denote a more specific type of -instance. You can also apply the `type` modifier to create a named instance of -the type itself: +For example: .. code-block:: nim - type - MyConcept = concept x, var v, ref r, ptr p, static s, type T - ... - -Within the concept body, types can appear in positions where ordinary values -and parameters are expected. This provides a more convenient way to check for -the presence of callable symbols with specific signatures: - -.. code-block:: nim + {.experimental: "strictFuncs".} type - OutputStream = concept var s - s.write(string) + Node = ref object + le, ri: Node + data: string -In order to check for symbols accepting `type` params, you must prefix -the type with the explicit `type` modifier. The named instance of the -type, following the `concept` keyword is also considered to have the -explicit modifier and will be matched only as a type. + func len(n: Node): int = + # valid: len does not have side effects + var it = n + while it != nil: + inc result + it = it.ri -.. code-block:: nim + func mut(n: Node) = + let m = n # is the statement that connected the mutation to the parameter + m.data = "yeah" # the mutation is here + # Error: 'mut' can have side effects + # an object reachable from 'n' is potentially mutated - type - # Let's imagine a user-defined casting framework with operators - # such as `val.to(string)` and `val.to(JSonValue)`. We can test - # for these with the following concept: - MyCastables = concept x - x.to(type string) - x.to(type JSonValue) - # Let's define a couple of concepts, known from Algebra: - AdditiveMonoid* = concept x, y, type T - x + y is T - T.zero is T # require a proc such as `int.zero` or 'Position.zero' +The algorithm behind this analysis is described in +the `view types section <#view-types-algorithm>`_. - AdditiveGroup* = concept x, y, type T - x is AdditiveMonoid - -x is T - x - y is T -Please note that the `is` operator allows one to easily verify the precise -type signatures of the required operations, but since type inference and -default parameters are still applied in the concept body, it's also possible -to describe usage protocols that do not reveal implementation details. +View types +========== -Much like generics, concepts are instantiated exactly once for each tested type -and any static code included within the body is executed only once. +.. tip:: `--experimental:views`:option: is more effective + with `--experimental:strictFuncs`:option:. +A view type is a type that is or contains one of the following types: -Concept diagnostics -------------------- +- `lent T` (view into `T`) +- `openArray[T]` (pair of (pointer to array of `T`, size)) -By default, the compiler will report the matching errors in concepts only when -no other overload can be selected and a normal compilation error is produced. -When you need to understand why the compiler is not matching a particular -concept and, as a result, a wrong overload is selected, you can apply the -`explain` pragma to either the concept body or a particular call-site. +For example: .. code-block:: nim type - MyConcept {.explain.} = concept ... + View1 = openArray[byte] + View2 = lent string + View3 = Table[openArray[char], int] - overloadedProc(x, y, z) {.explain.} -This will provide Hints in the compiler output either every time the concept is -not matched or only on the particular call-site. +Exceptions to this rule are types constructed via `ptr` or `proc`. +For example, the following types are **not** view types: +.. code-block:: nim -Generic concepts and type binding rules ---------------------------------------- + type + NotView1 = proc (x: openArray[int]) + NotView2 = ptr openArray[char] + NotView3 = ptr array[4, lent int] -The concept types can be parametric just like the regular generic types: -.. code-block:: nim +The mutability aspect of a view type is not part of the type but part +of the locations it's derived from. More on this later. - ### matrixalgo.nim +A *view* is a symbol (a let, var, const, etc.) that has a view type. - import std/typetraits +Since version 1.4, Nim allows view types to be used as local variables. +This feature needs to be enabled via `{.experimental: "views".}`. - type - AnyMatrix*[R, C: static int; T] = concept m, var mvar, type M - M.ValueType is T - M.Rows == R - M.Cols == C +A local variable of a view type *borrows* from the locations and +it is statically enforced that the view does not outlive the location +it was borrowed from. - m[int, int] is T - mvar[int, int] = T +For example: - type TransposedType = stripGenericParams(M)[C, R, T] +.. code-block:: nim - AnySquareMatrix*[N: static int, T] = AnyMatrix[N, N, T] + {.experimental: "views".} - AnyTransform3D* = AnyMatrix[4, 4, float] + proc take(a: openArray[int]) = + echo a.len - proc transposed*(m: AnyMatrix): m.TransposedType = - for r in 0 ..< m.R: - for c in 0 ..< m.C: - result[r, c] = m[c, r] + proc main(s: seq[int]) = + var x: openArray[int] = s # 'x' is a view into 's' + # it is checked that 'x' does not outlive 's' and + # that 's' is not mutated. + for i in 0 .. high(x): + echo x[i] + take(x) - proc determinant*(m: AnySquareMatrix): int = - ... + take(x.toOpenArray(0, 1)) # slicing remains possible + let y = x # create a view from a view + take y + # it is checked that 'y' does not outlive 'x' and + # that 'x' is not mutated as long as 'y' lives. - proc setPerspectiveProjection*(m: AnyTransform3D) = - ... - -------------- - ### matrix.nim + main(@[11, 22, 33]) - type - Matrix*[M, N: static int; T] = object - data: array[M*N, T] - proc `[]`*(M: Matrix; m, n: int): M.T = - M.data[m * M.N + n] +A local variable of a view type can borrow from a location +derived from a parameter, another local variable, a global `const` or `let` +symbol or a thread-local `var` or `let`. - proc `[]=`*(M: var Matrix; m, n: int; v: M.T) = - M.data[m * M.N + n] = v +Let `p` the proc that is analysed for the correctness of the borrow operation. - # Adapt the Matrix type to the concept's requirements - template Rows*(M: typedesc[Matrix]): int = M.M - template Cols*(M: typedesc[Matrix]): int = M.N - template ValueType*(M: typedesc[Matrix]): typedesc = M.T +Let `source` be one of: - ------------- - ### usage.nim +- A formal parameter of `p`. Note that this does not cover parameters of + inner procs. +- The `result` symbol of `p`. +- A local `var` or `let` or `const` of `p`. Note that this does + not cover locals of inner procs. +- A thread-local `var` or `let`. +- A global `let` or `const`. +- A constant array/seq/object/tuple constructor. - import matrix, matrixalgo - var - m: Matrix[3, 3, int] - projectionMatrix: Matrix[4, 4, float] +Path expressions +---------------- - echo m.transposed.determinant - setPerspectiveProjection projectionMatrix +A location derived from `source` is then defined as a path expression that +has `source` as the owner. A path expression `e` is defined recursively: -When the concept type is matched against a concrete type, the unbound type -parameters are inferred from the body of the concept in a way that closely -resembles the way generic parameters of callable symbols are inferred on -call sites. +- `source` itself is a path expression. +- Container access like `e[i]` is a path expression. +- Tuple access `e[0]` is a path expression. +- Object field access `e.field` is a path expression. +- `system.toOpenArray(e, ...)` is a path expression. +- Pointer dereference `e[]` is a path expression. +- An address `addr e`, `unsafeAddr e` is a path expression. +- A type conversion `T(e)` is a path expression. +- A cast expression `cast[T](e)` is a path expression. +- `f(e, ...)` is a path expression if `f`'s return type is a view type. + Because the view can only have been borrowed from `e`, we then know + that the owner of `f(e, ...)` is `e`. -Unbound types can appear both as params to calls such as `s.push(T)` and -on the right-hand side of the `is` operator in cases such as `x.pop is T` -and `x.data is seq[T]`. -Unbound static params will be inferred from expressions involving the `==` -operator and also when types dependent on them are being matched: +If a view type is used as a return type, the location must borrow from a location +that is derived from the first parameter that is passed to the proc. +See `the manual `_ +for details about how this is done for `var T`. -.. code-block:: nim +A mutable view can borrow from a mutable location, an immutable view can borrow +from both a mutable or an immutable location. - type - MatrixReducer[M, N: static int; T] = concept x - x.reduce(SquareMatrix[N, T]) is array[M, int] +If a view borrows from a mutable location, the view can be used to update the +location. Otherwise it cannot be used for mutations. -The Nim compiler includes a simple linear equation solver, allowing it to -infer static params in some situations where integer arithmetic is involved. +The *duration* of a borrow is the span of commands beginning from the assignment +to the view and ending with the last usage of the view. -Just like in regular type classes, Nim discriminates between `bind once` -and `bind many` types when matching the concept. You can add the `distinct` -modifier to any of the otherwise inferable types to get a type that will be -matched without permanently inferring it. This may be useful when you need -to match several procs accepting the same wide class of types: +For the duration of the borrow operation, no mutations to the borrowed locations +may be performed except via the view that borrowed from the +location. The borrowed location is said to be *sealed* during the borrow. .. code-block:: nim - type - Enumerable[T] = concept e - for v in e: - v is T + {.experimental: "views".} type - MyConcept = concept o - # this could be inferred to a type such as Enumerable[int] - o.foo is distinct Enumerable + Obj = object + field: string - # this could be inferred to a different type such as Enumerable[float] - o.bar is distinct Enumerable + proc dangerous(s: var seq[Obj]) = + let v: lent Obj = s[0] # seal 's' + s.setLen 0 # prevented at compile-time because 's' is sealed. + echo v.field - # it's also possible to give an alias name to a `bind many` type class - type Enum = distinct Enumerable - o.baz is Enum -On the other hand, using `bind once` types allows you to test for equivalent -types used in multiple signatures, without actually requiring any concrete -types, thus allowing you to encode implementation-defined types: +The scope of the view does not matter: .. code-block:: nim - type - MyConcept = concept x - type T1 = auto - x.foo(T1) - x.bar(T1) # both procs must accept the same type + proc valid(s: var seq[Obj]) = + let v: lent Obj = s[0] # begin of borrow + echo v.field # end of borrow + s.setLen 0 # valid because 'v' isn't used afterwards - type T2 = seq[SomeNumber] - x.alpha(T2) - x.omega(T2) # both procs must accept the same type - # and it must be a numeric sequence -As seen in the previous examples, you can refer to generic concepts such as -`Enumerable[T]` just by their short name. Much like the regular generic types, -the concept will be automatically instantiated with the bind once auto type -in the place of each missing generic param. +The analysis requires as much precision about mutations as is reasonably obtainable, +so it is more effective with the experimental `strict funcs <#strict-funcs>`_ +feature. In other words `--experimental:views`:option: works better +with `--experimental:strictFuncs`:option:. -Please note that generic concepts such as `Enumerable[T]` can be matched -against concrete types such as `string`. Nim doesn't require the concept -type to have the same number of parameters as the type being matched. -If you wish to express a requirement towards the generic parameters of -the matched type, you can use a type mapping operator such as `genericHead` -or `stripGenericParams` within the body of the concept to obtain the -uninstantiated version of the type, which you can then try to instantiate -in any required way. For example, here is how one might define the classic -`Functor` concept from Haskell and then demonstrate that Nim's `Option[T]` -type is an instance of it: +The analysis is currently control flow insensitive: .. code-block:: nim - :test: "nim c $1" - import std/[sugar, typetraits] + proc invalid(s: var seq[Obj]) = + let v: lent Obj = s[0] + if false: + s.setLen 0 + echo v.field - type - Functor[A] = concept f - type MatchedGenericType = genericHead(typeof(f)) - # `f` will be a value of a type such as `Option[T]` - # `MatchedGenericType` will become the `Option` type +In this example, the compiler assumes that `s.setLen 0` invalidates the +borrow operation of `v` even though a human being can easily see that it +will never do that at runtime. - f.val is A - # The Functor should provide a way to obtain - # a value stored inside it - type T = auto - map(f, A -> T) is MatchedGenericType[T] - # And it should provide a way to map one instance of - # the Functor to a instance of a different type, given - # a suitable `map` operation for the enclosed values +Start of a borrow +----------------- - import std/options - echo Option[int] is Functor # prints true +A borrow starts with one of the following: +- The assignment of a non-view-type to a view-type. +- The assignment of a location that is derived from a local parameter + to a view-type. -Concept derived values ----------------------- -All top level constants or types appearing within the concept body are -accessible through the dot operator in procs where the concept was successfully -matched to a concrete type: +End of a borrow +--------------- -.. code-block:: nim +A borrow operation ends with the last usage of the view variable. - type - DateTime = concept t1, t2, type T - const Min = T.MinDate - T.Now is T - t1 < t2 is bool +Reborrows +--------- - type TimeSpan = typeof(t1 - t2) - TimeSpan * int is TimeSpan - TimeSpan + TimeSpan is TimeSpan +A view `v` can borrow from multiple different locations. However, the borrow +is always the full span of `v`'s lifetime and every location that is borrowed +from is sealed during `v`'s lifetime. - t1 + TimeSpan is T - proc eventsJitter(events: Enumerable[DateTime]): float = - var - # this variable will have the inferred TimeSpan type for - # the concrete Date-like value the proc was called with: - averageInterval: DateTime.TimeSpan +Algorithm +--------- - deviation: float - ... +The following section is an outline of the algorithm that the current implementation +uses. The algorithm performs two traversals over the AST of the procedure or global +section of code that uses a view variable. No fixpoint iterations are performed, the +complexity of the analysis is O(N) where N is the number of nodes of the AST. +The first pass over the AST computes the lifetime of each local variable based on +a notion of an "abstract time", in the implementation it's a simple integer that is +incremented for every visited node. -Concept refinement ------------------- +In the second pass, information about the underlying object "graphs" is computed. +Let `v` be a parameter or a local variable. Let `G(v)` be the graph +that `v` belongs to. A graph is defined by the set of variables that belong +to the graph. Initially for all `v`: `G(v) = {v}`. Every variable can only +be part of a single graph. -When the matched type within a concept is directly tested against a different -concept, we say that the outer concept is a refinement of the inner concept and -thus it is more-specific. When both concepts are matched in a call during -overload resolution, Nim will assign a higher precedence to the most specific -one. As an alternative way of defining concept refinements, you can use the -object inheritance syntax involving the `of` keyword: +Assignments like `a = b` "connect" two variables, both variables end up in the +same graph `{a, b} = G(a) = G(b)`. Unfortunately, the pattern to look for is +much more complex than that and can involve multiple assignment targets +and sources:: -.. code-block:: nim + f(x, y) = g(a, b) - type - Graph = concept g, type G of EquallyComparable, Copyable - type - VertexType = G.VertexType - EdgeType = G.EdgeType +connects `x` and `y` to `a` and `b`: `G(x) = G(y) = G(a) = G(b) = {x, y, a, b}`. +A type based alias analysis rules out some of these combinations, for example +a `string` value cannot possibly be connected to a `seq[int]`. - VertexType is Copyable - EdgeType is Copyable +A pattern like `v[] = value` or `v.field = value` marks `G(v)` as mutated. +After the second pass a set of disjoint graphs was computed. - var - v: VertexType - e: EdgeType +For strict functions it is then enforced that there is no graph that is both mutated +and has an element that is an immutable parameter (that is a parameter that is not +of type `var T`). - IncidendeGraph = concept of Graph - # symbols such as variables and types from the refined - # concept are automatically in scope: +For borrow checking, a different set of checks is performed. Let `v` be the view +and `b` the location that is borrowed from. - g.source(e) is VertexType - g.target(e) is VertexType +- The lifetime of `v` must not exceed `b`'s lifetime. Note: The lifetime of + a parameter is the complete proc body. +- If `v` is used for a mutation, `b` must be a mutable location too. +- During `v`'s lifetime, `G(b)` can only be modified by `v` (and only if + `v` is a mutable view). +- If `v` is `result` then `b` has to be a location derived from the first + formal parameter or from a constant location. +- A view cannot be used for a read or a write access before it was assigned to. - g.outgoingEdges(v) is Enumerable[EdgeType] - BidirectionalGraph = concept g, type G - # The following will also turn the concept into a refinement when it - # comes to overload resolution, but it doesn't provide the convenient - # symbol inheritance - g is IncidendeGraph +Concepts +======== - g.incomingEdges(G.VertexType) is Enumerable[G.EdgeType] +Concepts, also known as "user-defined type classes", are used to specify an +arbitrary set of requirements that the matched type must satisfy. - proc f(g: IncidendeGraph) - proc f(g: BidirectionalGraph) # this one will be preferred if we pass a type - # matching the BidirectionalGraph concept +Concepts are written in the following form: -.. - Converter type classes - ---------------------- +.. code-block:: nim - Concepts can also be used to convert a whole range of types to a single type or - a small set of simpler types. This is achieved with a `return` statement within - the concept body: + type + Comparable = concept x, y + (x < y) is bool - .. code-block:: nim + Stack[T] = concept s, var v + s.pop() is T + v.push(T) - type - Stringable = concept x - $x is string - return $x + s.len is Ordinal - StringRefValue[CharType] = object - base: ptr CharType - len: int + for value in s: + value is T - StringRef = concept x - # the following would be an overloaded proc for cstring, string, seq and - # other user-defined types, returning either a StringRefValue[char] or - # StringRefValue[wchar] - return makeStringRefValue(x) +The concept matches if: - # the varargs param will here be converted to an array of StringRefValues - # the proc will have only two instantiations for the two character types - proc log(format: static string, varargs[StringRef]) +a) all expressions within the body can be compiled for the tested type +b) all statically evaluable boolean expressions in the body are true - # this proc will allow char and wchar values to be mixed in - # the same call at the cost of additional instantiations - # the varargs param will be converted to a tuple - proc log(format: static string, varargs[distinct StringRef]) +The identifiers following the `concept` keyword represent instances of the +currently matched type. You can apply any of the standard type modifiers such +as `var`, `ref`, `ptr` and `static` to denote a more specific type of +instance. You can also apply the `type` modifier to create a named instance of +the type itself: +.. code-block:: nim -.. - VTable types - ------------ + type + MyConcept = concept x, var v, ref r, ptr p, static s, type T + ... - Concepts allow Nim to define a great number of algorithms, using only - static polymorphism and without erasing any type information or sacrificing - any execution speed. But when polymorphic collections of objects are required, - the user must use one of the provided type erasure techniques - either common - base types or VTable types. +Within the concept body, types can appear in positions where ordinary values +and parameters are expected. This provides a more convenient way to check for +the presence of callable symbols with specific signatures: - VTable types are represented as "fat pointers" storing a reference to an - object together with a reference to a table of procs implementing a set of - required operations (the so called vtable). +.. code-block:: nim - In contrast to other programming languages, the vtable in Nim is stored - externally to the object, allowing you to create multiple different vtable - views for the same object. Thus, the polymorphism in Nim is unbounded - - any type can implement an unlimited number of protocols or interfaces not - originally envisioned by the type's author. + type + OutputStream = concept var s + s.write(string) - Any concept type can be turned into a VTable type by using the `vtref` - or the `vtptr` compiler magics. Under the hood, these magics generate - a converter type class, which converts the regular instances of the matching - types to the corresponding VTable type. +In order to check for symbols accepting `type` params, you must prefix +the type with the explicit `type` modifier. The named instance of the +type, following the `concept` keyword is also considered to have the +explicit modifier and will be matched only as a type. - .. code-block:: nim +.. code-block:: nim - type - IntEnumerable = vtref Enumerable[int] + type + # Let's imagine a user-defined casting framework with operators + # such as `val.to(string)` and `val.to(JSonValue)`. We can test + # for these with the following concept: + MyCastables = concept x + x.to(type string) + x.to(type JSonValue) - MyObject = object - enumerables: seq[IntEnumerable] - streams: seq[OutputStream.vtref] + # Let's define a couple of concepts, known from Algebra: + AdditiveMonoid* = concept x, y, type T + x + y is T + T.zero is T # require a proc such as `int.zero` or 'Position.zero' - proc addEnumerable(o: var MyObject, e: IntEnumerable) = - o.enumerables.add e + AdditiveGroup* = concept x, y, type T + x is AdditiveMonoid + -x is T + x - y is T - proc addStream(o: var MyObject, e: OutputStream.vtref) = - o.streams.add e +Please note that the `is` operator allows one to easily verify the precise +type signatures of the required operations, but since type inference and +default parameters are still applied in the concept body, it's also possible +to describe usage protocols that do not reveal implementation details. - The procs that will be included in the vtable are derived from the concept - body and include all proc calls for which all param types were specified as - concrete types. All such calls should include exactly one param of the type - matched against the concept (not necessarily in the first position), which - will be considered the value bound to the vtable. +Much like generics, concepts are instantiated exactly once for each tested type +and any static code included within the body is executed only once. - Overloads will be created for all captured procs, accepting the vtable type - in the position of the captured underlying object. - Under these rules, it's possible to obtain a vtable type for a concept with - unbound type parameters or one instantiated with metatypes (type classes), - but it will include a smaller number of captured procs. A completely empty - vtable will be reported as an error. +Concept diagnostics +------------------- - The `vtref` magic produces types which can be bound to `ref` types and - the `vtptr` magic produced types bound to `ptr` types. +By default, the compiler will report the matching errors in concepts only when +no other overload can be selected and a normal compilation error is produced. +When you need to understand why the compiler is not matching a particular +concept and, as a result, a wrong overload is selected, you can apply the +`explain` pragma to either the concept body or a particular call-site. +.. code-block:: nim -Type bound operations -===================== + type + MyConcept {.explain.} = concept ... -There are 4 operations that are bound to a type: + overloadedProc(x, y, z) {.explain.} -1. Assignment -2. Moves -3. Destruction -4. Deep copying for communication between threads +This will provide Hints in the compiler output either every time the concept is +not matched or only on the particular call-site. -These operations can be *overridden* instead of *overloaded*. This means the -implementation is automatically lifted to structured types. For instance if type -`T` has an overridden assignment operator `=` this operator is also used -for assignments of the type `seq[T]`. Since these operations are bound to a -type they have to be bound to a nominal type for reasons of simplicity of -implementation: This means an overridden `deepCopy` for `ref T` is really -bound to `T` and not to `ref T`. This also means that one cannot override -`deepCopy` for both `ptr T` and `ref T` at the same time; instead a -helper distinct or object type has to be used for one pointer type. -Assignments, moves and destruction are specified in -the `destructors `_ document. +Generic concepts and type binding rules +--------------------------------------- +The concept types can be parametric just like the regular generic types: -deepCopy --------- +.. code-block:: nim -`=deepCopy` is a builtin that is invoked whenever data is passed to -a `spawn`'ed proc to ensure memory safety. The programmer can override its -behaviour for a specific `ref` or `ptr` type `T`. (Later versions of the -language may weaken this restriction.) + ### matrixalgo.nim -The signature has to be: + import std/typetraits -.. code-block:: nim + type + AnyMatrix*[R, C: static int; T] = concept m, var mvar, type M + M.ValueType is T + M.Rows == R + M.Cols == C - proc `=deepCopy`(x: T): T + m[int, int] is T + mvar[int, int] = T -This mechanism will be used by most data structures that support shared memory -like channels to implement thread safe automatic memory management. + type TransposedType = stripGenericParams(M)[C, R, T] -The builtin `deepCopy` can even clone closures and their environments. See -the documentation of `spawn <#parallel-amp-spawn-spawn-statement>`_ for details. + AnySquareMatrix*[N: static int, T] = AnyMatrix[N, N, T] + AnyTransform3D* = AnyMatrix[4, 4, float] -Case statement macros -===================== + proc transposed*(m: AnyMatrix): m.TransposedType = + for r in 0 ..< m.R: + for c in 0 ..< m.C: + result[r, c] = m[c, r] -Macros named `case` can rewrite `case` statements for certain types in order to -implement `pattern matching`:idx:. The following example implements a -simplistic form of pattern matching for tuples, leveraging the existing -equality operator for tuples (as provided in `system.==`): + proc determinant*(m: AnySquareMatrix): int = + ... -.. code-block:: nim - :test: "nim c $1" + proc setPerspectiveProjection*(m: AnyTransform3D) = + ... - {.experimental: "caseStmtMacros".} + -------------- + ### matrix.nim - import std/macros + type + Matrix*[M, N: static int; T] = object + data: array[M*N, T] + + proc `[]`*(M: Matrix; m, n: int): M.T = + M.data[m * M.N + n] - macro `case`(n: tuple): untyped = - result = newTree(nnkIfStmt) - let selector = n[0] - for i in 1 ..< n.len: - let it = n[i] - case it.kind - of nnkElse, nnkElifBranch, nnkElifExpr, nnkElseExpr: - result.add it - of nnkOfBranch: - for j in 0..it.len-2: - let cond = newCall("==", selector, it[j]) - result.add newTree(nnkElifBranch, cond, it[^1]) - else: - error "custom 'case' for tuple cannot handle this node", it + proc `[]=`*(M: var Matrix; m, n: int; v: M.T) = + M.data[m * M.N + n] = v - case ("foo", 78) - of ("foo", 78): echo "yes" - of ("bar", 88): echo "no" - else: discard + # Adapt the Matrix type to the concept's requirements + template Rows*(M: typedesc[Matrix]): int = M.M + template Cols*(M: typedesc[Matrix]): int = M.N + template ValueType*(M: typedesc[Matrix]): typedesc = M.T + ------------- + ### usage.nim -Currently case statement macros must be enabled explicitly -via `{.experimental: "caseStmtMacros".}`. + import matrix, matrixalgo -`case` macros are subject to overload resolution. The type of the -`case` statement's selector expression is matched against the type -of the first argument of the `case` macro. Then the complete `case` -statement is passed in place of the argument and the macro is evaluated. + var + m: Matrix[3, 3, int] + projectionMatrix: Matrix[4, 4, float] -In other words, the macro needs to transform the full `case` statement -but only the statement's selector expression is used to determine which -macro to call. + echo m.transposed.determinant + setPerspectiveProjection projectionMatrix +When the concept type is matched against a concrete type, the unbound type +parameters are inferred from the body of the concept in a way that closely +resembles the way generic parameters of callable symbols are inferred on +call sites. -Term rewriting macros -===================== +Unbound types can appear both as params to calls such as `s.push(T)` and +on the right-hand side of the `is` operator in cases such as `x.pop is T` +and `x.data is seq[T]`. -Term rewriting macros are macros or templates that have not only -a *name* but also a *pattern* that is searched for after the semantic checking -phase of the compiler: This means they provide an easy way to enhance the -compilation pipeline with user defined optimizations: +Unbound static params will be inferred from expressions involving the `==` +operator and also when types dependent on them are being matched: .. code-block:: nim - template optMul{`*`(a, 2)}(a: int): int = a+a - - let x = 3 - echo x * 2 - -The compiler now rewrites `x * 2` as `x + x`. The code inside the -curlies is the pattern to match against. The operators `*`, `**`, -`|`, `~` have a special meaning in patterns if they are written in infix -notation, so to match verbatim against `*` the ordinary function call syntax -needs to be used. - -Term rewriting macro are applied recursively, up to a limit. This means that -if the result of a term rewriting macro is eligible for another rewriting, -the compiler will try to perform it, and so on, until no more optimizations -are applicable. To avoid putting the compiler into an infinite loop, there is -a hard limit on how many times a single term rewriting macro can be applied. -Once this limit has been passed, the term rewriting macro will be ignored. - -Unfortunately optimizations are hard to get right and even the tiny example -is **wrong**: - -.. code-block:: nim - - template optMul{`*`(a, 2)}(a: int): int = a+a - - proc f(): int = - echo "side effect!" - result = 55 - - echo f() * 2 - -We cannot duplicate 'a' if it denotes an expression that has a side effect! -Fortunately Nim supports side effect analysis: - -.. code-block:: nim - - template optMul{`*`(a, 2)}(a: int{noSideEffect}): int = a+a - - proc f(): int = - echo "side effect!" - result = 55 - - echo f() * 2 # not optimized ;-) - -You can make one overload matching with a constraint and one without, and the -one with a constraint will have precedence, and so you can handle both cases -differently. - -So what about `2 * a`? We should tell the compiler `*` is commutative. We -cannot really do that however as the following code only swaps arguments -blindly: - -.. code-block:: nim + type + MatrixReducer[M, N: static int; T] = concept x + x.reduce(SquareMatrix[N, T]) is array[M, int] - template mulIsCommutative{`*`(a, b)}(a, b: int): int = b*a +The Nim compiler includes a simple linear equation solver, allowing it to +infer static params in some situations where integer arithmetic is involved. -What optimizers really need to do is a *canonicalization*: +Just like in regular type classes, Nim discriminates between `bind once` +and `bind many` types when matching the concept. You can add the `distinct` +modifier to any of the otherwise inferable types to get a type that will be +matched without permanently inferring it. This may be useful when you need +to match several procs accepting the same wide class of types: .. code-block:: nim - template canonMul{`*`(a, b)}(a: int{lit}, b: int): int = b*a - -The `int{lit}` parameter pattern matches against an expression of -type `int`, but only if it's a literal. - - - -Parameter constraints ---------------------- - -The `parameter constraint`:idx: expression can use the operators `|` (or), -`&` (and) and `~` (not) and the following predicates: - -=================== ===================================================== -Predicate Meaning -=================== ===================================================== -`atom` The matching node has no children. -`lit` The matching node is a literal like `"abc"`, `12`. -`sym` The matching node must be a symbol (a bound - identifier). -`ident` The matching node must be an identifier (an unbound - identifier). -`call` The matching AST must be a call/apply expression. -`lvalue` The matching AST must be an lvalue. -`sideeffect` The matching AST must have a side effect. -`nosideeffect` The matching AST must have no side effect. -`param` A symbol which is a parameter. -`genericparam` A symbol which is a generic parameter. -`module` A symbol which is a module. -`type` A symbol which is a type. -`var` A symbol which is a variable. -`let` A symbol which is a `let` variable. -`const` A symbol which is a constant. -`result` The special `result` variable. -`proc` A symbol which is a proc. -`method` A symbol which is a method. -`iterator` A symbol which is an iterator. -`converter` A symbol which is a converter. -`macro` A symbol which is a macro. -`template` A symbol which is a template. -`field` A symbol which is a field in a tuple or an object. -`enumfield` A symbol which is a field in an enumeration. -`forvar` A for loop variable. -`label` A label (used in `block` statements). -`nk*` The matching AST must have the specified kind. - (Example: `nkIfStmt` denotes an `if` statement.) -`alias` States that the marked parameter needs to alias - with *some* other parameter. -`noalias` States that *every* other parameter must not alias - with the marked parameter. -=================== ===================================================== + type + Enumerable[T] = concept e + for v in e: + v is T -Predicates that share their name with a keyword have to be escaped with -backticks. -The `alias` and `noalias` predicates refer not only to the matching AST, -but also to every other bound parameter; syntactically they need to occur after -the ordinary AST predicates: + type + MyConcept = concept o + # this could be inferred to a type such as Enumerable[int] + o.foo is distinct Enumerable -.. code-block:: nim + # this could be inferred to a different type such as Enumerable[float] + o.bar is distinct Enumerable - template ex{a = b + c}(a: int{noalias}, b, c: int) = - # this transformation is only valid if 'b' and 'c' do not alias 'a': - a = b - inc a, c + # it's also possible to give an alias name to a `bind many` type class + type Enum = distinct Enumerable + o.baz is Enum -Another example: +On the other hand, using `bind once` types allows you to test for equivalent +types used in multiple signatures, without actually requiring any concrete +types, thus allowing you to encode implementation-defined types: .. code-block:: nim - proc somefunc(s: string) = assert s == "variable" - proc somefunc(s: string{nkStrLit}) = assert s == "literal" - proc somefunc(s: string{nkRStrLit}) = assert s == r"raw" - proc somefunc(s: string{nkTripleStrLit}) = assert s == """triple""" - proc somefunc(s: static[string]) = assert s == "constant" - - # Use parameter constraints to provide overloads based on both the input parameter type and form. - var variable = "variable" - somefunc(variable) - const constant = "constant" - somefunc(constant) - somefunc("literal") - somefunc(r"raw") - somefunc("""triple""") - - -Pattern operators ------------------ - -The operators `*`, `**`, `|`, `~` have a special meaning in patterns -if they are written in infix notation. - - -The `|` operator -~~~~~~~~~~~~~~~~~~ - -The `|` operator if used as infix operator creates an ordered choice: + type + MyConcept = concept x + type T1 = auto + x.foo(T1) + x.bar(T1) # both procs must accept the same type -.. code-block:: nim + type T2 = seq[SomeNumber] + x.alpha(T2) + x.omega(T2) # both procs must accept the same type + # and it must be a numeric sequence - template t{0|1}(): untyped = 3 - let a = 1 - # outputs 3: - echo a +As seen in the previous examples, you can refer to generic concepts such as +`Enumerable[T]` just by their short name. Much like the regular generic types, +the concept will be automatically instantiated with the bind once auto type +in the place of each missing generic param. -The matching is performed after the compiler performed some optimizations like -constant folding, so the following does not work: +Please note that generic concepts such as `Enumerable[T]` can be matched +against concrete types such as `string`. Nim doesn't require the concept +type to have the same number of parameters as the type being matched. +If you wish to express a requirement towards the generic parameters of +the matched type, you can use a type mapping operator such as `genericHead` +or `stripGenericParams` within the body of the concept to obtain the +uninstantiated version of the type, which you can then try to instantiate +in any required way. For example, here is how one might define the classic +`Functor` concept from Haskell and then demonstrate that Nim's `Option[T]` +type is an instance of it: .. code-block:: nim + :test: "nim c $1" - template t{0|1}(): untyped = 3 - # outputs 1: - echo 1 - -The reason is that the compiler already transformed the 1 into "1" for -the `echo` statement. However, a term rewriting macro should not change the -semantics anyway. In fact they can be deactivated with the `--patterns:off`:option: -command line option or temporarily with the `patterns` pragma. - + import std/[sugar, typetraits] -The `{}` operator -~~~~~~~~~~~~~~~~~~~ + type + Functor[A] = concept f + type MatchedGenericType = genericHead(typeof(f)) + # `f` will be a value of a type such as `Option[T]` + # `MatchedGenericType` will become the `Option` type -A pattern expression can be bound to a pattern parameter via the `expr{param}` -notation: + f.val is A + # The Functor should provide a way to obtain + # a value stored inside it -.. code-block:: nim + type T = auto + map(f, A -> T) is MatchedGenericType[T] + # And it should provide a way to map one instance of + # the Functor to a instance of a different type, given + # a suitable `map` operation for the enclosed values - template t{(0|1|2){x}}(x: untyped): untyped = x+1 - let a = 1 - # outputs 2: - echo a + import std/options + echo Option[int] is Functor # prints true -The `~` operator -~~~~~~~~~~~~~~~~~~ +Concept derived values +---------------------- -The `~` operator is the **not** operator in patterns: +All top level constants or types appearing within the concept body are +accessible through the dot operator in procs where the concept was successfully +matched to a concrete type: .. code-block:: nim - template t{x = (~x){y} and (~x){z}}(x, y, z: bool) = - x = y - if x: x = z - - var - a = false - b = true - c = false - a = b and c - echo a - - -The `*` operator -~~~~~~~~~~~~~~~~~~ - -The `*` operator can *flatten* a nested binary expression like `a & b & c` -to `&(a, b, c)`: + type + DateTime = concept t1, t2, type T + const Min = T.MinDate + T.Now is T -.. code-block:: nim + t1 < t2 is bool - var - calls = 0 + type TimeSpan = typeof(t1 - t2) + TimeSpan * int is TimeSpan + TimeSpan + TimeSpan is TimeSpan - proc `&&`(s: varargs[string]): string = - result = s[0] - for i in 1..len(s)-1: result.add s[i] - inc calls + t1 + TimeSpan is T - template optConc{ `&&` * a }(a: string): untyped = &&a + proc eventsJitter(events: Enumerable[DateTime]): float = + var + # this variable will have the inferred TimeSpan type for + # the concrete Date-like value the proc was called with: + averageInterval: DateTime.TimeSpan - let space = " " - echo "my" && (space & "awe" && "some " ) && "concat" + deviation: float + ... - # check that it's been optimized properly: - doAssert calls == 1 +Concept refinement +------------------ -The second operator of `*` must be a parameter; it is used to gather all the -arguments. The expression `"my" && (space & "awe" && "some " ) && "concat"` -is passed to `optConc` in `a` as a special list (of kind `nkArgList`) -which is flattened into a call expression; thus the invocation of `optConc` -produces: +When the matched type within a concept is directly tested against a different +concept, we say that the outer concept is a refinement of the inner concept and +thus it is more-specific. When both concepts are matched in a call during +overload resolution, Nim will assign a higher precedence to the most specific +one. As an alternative way of defining concept refinements, you can use the +object inheritance syntax involving the `of` keyword: .. code-block:: nim - `&&`("my", space & "awe", "some ", "concat") - - -The `**` operator -~~~~~~~~~~~~~~~~~~~ + type + Graph = concept g, type G of EquallyComparable, Copyable + type + VertexType = G.VertexType + EdgeType = G.EdgeType -The `**` is much like the `*` operator, except that it gathers not only -all the arguments, but also the matched operators in reverse polish notation: + VertexType is Copyable + EdgeType is Copyable -.. code-block:: nim + var + v: VertexType + e: EdgeType - import std/macros + IncidendeGraph = concept of Graph + # symbols such as variables and types from the refined + # concept are automatically in scope: - type - Matrix = object - dummy: int + g.source(e) is VertexType + g.target(e) is VertexType - proc `*`(a, b: Matrix): Matrix = discard - proc `+`(a, b: Matrix): Matrix = discard - proc `-`(a, b: Matrix): Matrix = discard - proc `$`(a: Matrix): string = result = $a.dummy - proc mat21(): Matrix = - result.dummy = 21 + g.outgoingEdges(v) is Enumerable[EdgeType] - macro optM{ (`+`|`-`|`*`) ** a }(a: Matrix): untyped = - echo treeRepr(a) - result = newCall(bindSym"mat21") + BidirectionalGraph = concept g, type G + # The following will also turn the concept into a refinement when it + # comes to overload resolution, but it doesn't provide the convenient + # symbol inheritance + g is IncidendeGraph - var x, y, z: Matrix + g.incomingEdges(G.VertexType) is Enumerable[G.EdgeType] - echo x + y * z - x + proc f(g: IncidendeGraph) + proc f(g: BidirectionalGraph) # this one will be preferred if we pass a type + # matching the BidirectionalGraph concept -This passes the expression `x + y * z - x` to the `optM` macro as -an `nnkArgList` node containing:: +.. + Converter type classes + ---------------------- - Arglist - Sym "x" - Sym "y" - Sym "z" - Sym "*" - Sym "+" - Sym "x" - Sym "-" + Concepts can also be used to convert a whole range of types to a single type or + a small set of simpler types. This is achieved with a `return` statement within + the concept body: -(Which is the reverse polish notation of `x + y * z - x`.) + .. code-block:: nim + type + Stringable = concept x + $x is string + return $x -Parameters ----------- + StringRefValue[CharType] = object + base: ptr CharType + len: int -Parameters in a pattern are type checked in the matching process. If a -parameter is of the type `varargs` it is treated specially and it can match -0 or more arguments in the AST to be matched against: + StringRef = concept x + # the following would be an overloaded proc for cstring, string, seq and + # other user-defined types, returning either a StringRefValue[char] or + # StringRefValue[wchar] + return makeStringRefValue(x) -.. code-block:: nim + # the varargs param will here be converted to an array of StringRefValues + # the proc will have only two instantiations for the two character types + proc log(format: static string, varargs[StringRef]) - template optWrite{ - write(f, x) - ((write|writeLine){w})(f, y) - }(x, y: varargs[untyped], f: File, w: untyped) = - w(f, x, y) + # this proc will allow char and wchar values to be mixed in + # the same call at the cost of additional instantiations + # the varargs param will be converted to a tuple + proc log(format: static string, varargs[distinct StringRef]) +.. + VTable types + ------------ -Example: Partial evaluation ---------------------------- + Concepts allow Nim to define a great number of algorithms, using only + static polymorphism and without erasing any type information or sacrificing + any execution speed. But when polymorphic collections of objects are required, + the user must use one of the provided type erasure techniques - either common + base types or VTable types. -The following example shows how some simple partial evaluation can be -implemented with term rewriting: + VTable types are represented as "fat pointers" storing a reference to an + object together with a reference to a table of procs implementing a set of + required operations (the so called vtable). -.. code-block:: nim + In contrast to other programming languages, the vtable in Nim is stored + externally to the object, allowing you to create multiple different vtable + views for the same object. Thus, the polymorphism in Nim is unbounded - + any type can implement an unlimited number of protocols or interfaces not + originally envisioned by the type's author. - proc p(x, y: int; cond: bool): int = - result = if cond: x + y else: x - y + Any concept type can be turned into a VTable type by using the `vtref` + or the `vtptr` compiler magics. Under the hood, these magics generate + a converter type class, which converts the regular instances of the matching + types to the corresponding VTable type. - template optP1{p(x, y, true)}(x, y: untyped): untyped = x + y - template optP2{p(x, y, false)}(x, y: untyped): untyped = x - y + .. code-block:: nim + type + IntEnumerable = vtref Enumerable[int] -Example: Hoisting ------------------ + MyObject = object + enumerables: seq[IntEnumerable] + streams: seq[OutputStream.vtref] -The following example shows how some form of hoisting can be implemented: + proc addEnumerable(o: var MyObject, e: IntEnumerable) = + o.enumerables.add e -.. code-block:: nim + proc addStream(o: var MyObject, e: OutputStream.vtref) = + o.streams.add e - import std/pegs + The procs that will be included in the vtable are derived from the concept + body and include all proc calls for which all param types were specified as + concrete types. All such calls should include exactly one param of the type + matched against the concept (not necessarily in the first position), which + will be considered the value bound to the vtable. - template optPeg{peg(pattern)}(pattern: string{lit}): Peg = - var gl {.global, gensym.} = peg(pattern) - gl + Overloads will be created for all captured procs, accepting the vtable type + in the position of the captured underlying object. - for i in 0 .. 3: - echo match("(a b c)", peg"'(' @ ')'") - echo match("W_HI_Le", peg"\y 'while'") + Under these rules, it's possible to obtain a vtable type for a concept with + unbound type parameters or one instantiated with metatypes (type classes), + but it will include a smaller number of captured procs. A completely empty + vtable will be reported as an error. -The `optPeg` template optimizes the case of a peg constructor with a string -literal, so that the pattern will only be parsed once at program startup and -stored in a global `gl` which is then re-used. This optimization is called -hoisting because it is comparable to classical loop hoisting. + The `vtref` magic produces types which can be bound to `ref` types and + the `vtptr` magic produced types bound to `ptr` types. -AST based overloading +Type bound operations ===================== -Parameter constraints can also be used for ordinary routine parameters; these -constraints affect ordinary overloading resolution then: - -.. code-block:: nim - - proc optLit(a: string{lit|`const`}) = - echo "string literal" - proc optLit(a: string) = - echo "no string literal" +There are 4 operations that are bound to a type: - const - constant = "abc" +1. Assignment +2. Moves +3. Destruction +4. Deep copying for communication between threads - var - variable = "xyz" +These operations can be *overridden* instead of *overloaded*. This means that +the implementation is automatically lifted to structured types. For instance, +if the type `T` has an overridden assignment operator `=`, this operator is +also used for assignments of the type `seq[T]`. - optLit("literal") - optLit(constant) - optLit(variable) +Since these operations are bound to a type, they have to be bound to a +nominal type for reasons of simplicity of implementation; this means an +overridden `deepCopy` for `ref T` is really bound to `T` and not to `ref T`. +This also means that one cannot override `deepCopy` for both `ptr T` and +`ref T` at the same time, instead a distinct or object helper type has to be +used for one pointer type. -However, the constraints `alias` and `noalias` are not available in -ordinary routines. +Assignments, moves and destruction are specified in +the `destructors `_ document. -Parallel & Spawn -================ +deepCopy +-------- -Nim has two flavors of parallelism: -1) `Structured`:idx: parallelism via the `parallel` statement. -2) `Unstructured`:idx: parallelism via the standalone `spawn` statement. +`=deepCopy` is a builtin that is invoked whenever data is passed to +a `spawn`'ed proc to ensure memory safety. The programmer can override its +behaviour for a specific `ref` or `ptr` type `T`. (Later versions of the +language may weaken this restriction.) -Nim has a builtin thread pool that can be used for CPU intensive tasks. For -IO intensive tasks the `async` and `await` features should be -used instead. Both parallel and spawn need the `threadpool `_ -module to work. +The signature has to be: -Somewhat confusingly, `spawn` is also used in the `parallel` statement -with slightly different semantics. `spawn` always takes a call expression of -the form `f(a, ...)`. Let `T` be `f`'s return type. If `T` is `void` -then `spawn`'s return type is also `void` otherwise it is `FlowVar[T]`. +.. code-block:: nim -Within a `parallel` section sometimes the `FlowVar[T]` is eliminated -to `T`. This happens when `T` does not contain any GC'ed memory. -The compiler can ensure the location in `location = spawn f(...)` is not -read prematurely within a `parallel` section and so there is no need for -the overhead of an indirection via `FlowVar[T]` to ensure correctness. + proc `=deepCopy`(x: T): T -**Note**: Currently exceptions are not propagated between `spawn`'ed tasks! +This mechanism will be used by most data structures that support shared memory, +like channels, to implement thread safe automatic memory management. +The builtin `deepCopy` can even clone closures and their environments. See +the documentation of `spawn <#parallel-amp-spawn-spawn-statement>`_ for details. -Spawn statement ---------------- -`spawn`:idx: can be used to pass a task to the thread pool: +Dynamic arguments for bindSym +============================= -.. code-block:: nim +This experimental feature allows the symbol name argument of `macros.bindSym` +to be computed dynamically. - import std/threadpool +.. code-block:: nim + {.experimental: "dynamicBindSym".} - proc processLine(line: string) = - discard "do some heavy lifting here" + import macros - for x in lines("myinput.txt"): - spawn processLine(x) - sync() + macro callOp(opName, arg1, arg2): untyped = + result = newCall(bindSym($opName), arg1, arg2) -For reasons of type safety and implementation simplicity the expression -that `spawn` takes is restricted: + echo callOp("+", 1, 2) + echo callOp("-", 5, 4) -* It must be a call expression `f(a, ...)`. -* `f` must be `gcsafe`. -* `f` must not have the calling convention `closure`. -* `f`'s parameters may not be of type `var`. - This means one has to use raw `ptr`'s for data passing reminding the - programmer to be careful. -* `ref` parameters are deeply copied which is a subtle semantic change and - can cause performance problems but ensures memory safety. This deep copy - is performed via `system.deepCopy` and so can be overridden. -* For *safe* data exchange between `f` and the caller a global `TChannel` - needs to be used. However, since spawn can return a result, often no further - communication is required. +Term rewriting macros +===================== -`spawn` executes the passed expression on the thread pool and returns -a `data flow variable`:idx: `FlowVar[T]` that can be read from. The reading -with the `^` operator is **blocking**. However, one can use `blockUntilAny` to -wait on multiple flow variables at the same time: +Term rewriting macros are macros or templates that have not only +a *name* but also a *pattern* that is searched for after the semantic checking +phase of the compiler: This means they provide an easy way to enhance the +compilation pipeline with user defined optimizations: .. code-block:: nim - import std/threadpool, ... + template optMul{`*`(a, 2)}(a: int): int = a + a - # wait until 2 out of 3 servers received the update: - proc main = - var responses = newSeq[FlowVarBase](3) - for i in 0..2: - responses[i] = spawn tellServer(Update, "key", "value") - var index = blockUntilAny(responses) - assert index >= 0 - responses.del(index) - discard blockUntilAny(responses) + let x = 3 + echo x * 2 -Data flow variables ensure that no data races -are possible. Due to technical limitations not every type `T` is possible in -a data flow variable: `T` has to be of the type `ref`, `string`, `seq` -or of a type that doesn't contain a type that is garbage collected. This -restriction is not hard to work-around in practice. +The compiler now rewrites `x * 2` as `x + x`. The code inside the +curly brackets is the pattern to match against. The operators `*`, `**`, +`|`, `~` have a special meaning in patterns if they are written in infix +notation, so to match verbatim against `*` the ordinary function call syntax +needs to be used. +Term rewriting macros are applied recursively, up to a limit. This means that +if the result of a term rewriting macro is eligible for another rewriting, +the compiler will try to perform it, and so on, until no more optimizations +are applicable. To avoid putting the compiler into an infinite loop, there is +a hard limit on how many times a single term rewriting macro can be applied. +Once this limit has been passed, the term rewriting macro will be ignored. +Unfortunately optimizations are hard to get right and even this tiny example +is **wrong**: -Parallel statement ------------------- +.. code-block:: nim -Example: + template optMul{`*`(a, 2)}(a: int): int = a + a + + proc f(): int = + echo "side effect!" + result = 55 + + echo f() * 2 + +We cannot duplicate 'a' if it denotes an expression that has a side effect! +Fortunately Nim supports side effect analysis: .. code-block:: nim - :test: "nim c --threads:on $1" - # Compute PI in an inefficient way - import std/[strutils, math, threadpool] - {.experimental: "parallel".} + template optMul{`*`(a, 2)}(a: int{noSideEffect}): int = a + a - proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1) + proc f(): int = + echo "side effect!" + result = 55 - proc pi(n: int): float = - var ch = newSeq[float](n+1) - parallel: - for k in 0..ch.high: - ch[k] = spawn term(float(k)) - for k in 0..ch.high: - result += ch[k] + echo f() * 2 # not optimized ;-) - echo formatFloat(pi(5000)) +You can make one overload matching with a constraint and one without, and the +one with a constraint will have precedence, and so you can handle both cases +differently. +So what about `2 * a`? We should tell the compiler `*` is commutative. We +cannot really do that however as the following code only swaps arguments +blindly: -The parallel statement is the preferred mechanism to introduce parallelism in a -Nim program. A subset of the Nim language is valid within a `parallel` -section. This subset is checked during semantic analysis to be free of data -races. A sophisticated `disjoint checker`:idx: ensures that no data races are -possible even though shared memory is extensively supported! +.. code-block:: nim -The subset is in fact the full language with the following -restrictions / changes: + template mulIsCommutative{`*`(a, b)}(a, b: int): int = b * a -* `spawn` within a `parallel` section has special semantics. -* Every location of the form `a[i]` and `a[i..j]` and `dest` where - `dest` is part of the pattern `dest = spawn f(...)` has to be - provably disjoint. This is called the *disjoint check*. -* Every other complex location `loc` that is used in a spawned - proc (`spawn f(loc)`) has to be immutable for the duration of - the `parallel` section. This is called the *immutability check*. Currently - it is not specified what exactly "complex location" means. We need to make - this an optimization! -* Every array access has to be provably within bounds. This is called - the *bounds check*. -* Slices are optimized so that no copy is performed. This optimization is not - yet performed for ordinary slices outside of a `parallel` section. +What optimizers really need to do is a *canonicalization*: +.. code-block:: nim -Guards and locks -================ + template canonMul{`*`(a, b)}(a: int{lit}, b: int): int = b * a -Apart from `spawn` and `parallel` Nim also provides all the common low level -concurrency mechanisms like locks, atomic intrinsics or condition variables. +The `int{lit}` parameter pattern matches against an expression of +type `int`, but only if it's a literal. -Nim significantly improves on the safety of these features via additional -pragmas: -1) A `guard`:idx: annotation is introduced to prevent data races. -2) Every access of a guarded memory location needs to happen in an - appropriate `locks`:idx: statement. -3) Locks and routines can be annotated with `lock levels`:idx: to allow - potential deadlocks to be detected during semantic analysis. +Parameter constraints +--------------------- -Guards and the locks section ----------------------------- +The `parameter constraint`:idx: expression can use the operators `|` (or), +`&` (and) and `~` (not) and the following predicates: -Protecting global variables -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +=================== ===================================================== +Predicate Meaning +=================== ===================================================== +`atom` The matching node has no children. +`lit` The matching node is a literal like `"abc"`, `12`. +`sym` The matching node must be a symbol (a bound + identifier). +`ident` The matching node must be an identifier (an unbound + identifier). +`call` The matching AST must be a call/apply expression. +`lvalue` The matching AST must be an lvalue. +`sideeffect` The matching AST must have a side effect. +`nosideeffect` The matching AST must have no side effect. +`param` A symbol which is a parameter. +`genericparam` A symbol which is a generic parameter. +`module` A symbol which is a module. +`type` A symbol which is a type. +`var` A symbol which is a variable. +`let` A symbol which is a `let` variable. +`const` A symbol which is a constant. +`result` The special `result` variable. +`proc` A symbol which is a proc. +`method` A symbol which is a method. +`iterator` A symbol which is an iterator. +`converter` A symbol which is a converter. +`macro` A symbol which is a macro. +`template` A symbol which is a template. +`field` A symbol which is a field in a tuple or an object. +`enumfield` A symbol which is a field in an enumeration. +`forvar` A for loop variable. +`label` A label (used in `block` statements). +`nk*` The matching AST must have the specified kind. + (Example: `nkIfStmt` denotes an `if` statement.) +`alias` States that the marked parameter needs to alias + with *some* other parameter. +`noalias` States that *every* other parameter must not alias + with the marked parameter. +=================== ===================================================== -Object fields and global variables can be annotated via a `guard` pragma: +Predicates that share their name with a keyword have to be escaped with +backticks. +The `alias` and `noalias` predicates refer not only to the matching AST, +but also to every other bound parameter; syntactically they need to occur after +the ordinary AST predicates: .. code-block:: nim - var glock: TLock - var gdata {.guard: glock.}: int + template ex{a = b + c}(a: int{noalias}, b, c: int) = + # this transformation is only valid if 'b' and 'c' do not alias 'a': + a = b + inc a, c -The compiler then ensures that every access of `gdata` is within a `locks` -section: +Another example: .. code-block:: nim - proc invalid = - # invalid: unguarded access: - echo gdata + proc somefunc(s: string) = assert s == "variable" + proc somefunc(s: string{nkStrLit}) = assert s == "literal" + proc somefunc(s: string{nkRStrLit}) = assert s == r"raw" + proc somefunc(s: string{nkTripleStrLit}) = assert s == """triple""" + proc somefunc(s: static[string]) = assert s == "constant" - proc valid = - # valid access: - {.locks: [glock].}: - echo gdata + # Use parameter constraints to provide overloads based on both the input parameter type and form. + var variable = "variable" + somefunc(variable) + const constant = "constant" + somefunc(constant) + somefunc("literal") + somefunc(r"raw") + somefunc("""triple""") -Top level accesses to `gdata` are always allowed so that it can be initialized -conveniently. It is *assumed* (but not enforced) that every top level statement -is executed before any concurrent action happens. -The `locks` section deliberately looks ugly because it has no runtime -semantics and should not be used directly! It should only be used in templates -that also implement some form of locking at runtime: +Pattern operators +----------------- -.. code-block:: nim +The operators `*`, `**`, `|`, `~` have a special meaning in patterns +if they are written in infix notation. - template lock(a: TLock; body: untyped) = - pthread_mutex_lock(a) - {.locks: [a].}: - try: - body - finally: - pthread_mutex_unlock(a) +The `|` operator +~~~~~~~~~~~~~~~~~~ -The guard does not need to be of any particular type. It is flexible enough to -model low level lockfree mechanisms: +The `|` operator if used as infix operator creates an ordered choice: .. code-block:: nim - var dummyLock {.compileTime.}: int - var atomicCounter {.guard: dummyLock.}: int - - template atomicRead(x): untyped = - {.locks: [dummyLock].}: - memoryReadBarrier() - x + template t{0|1}(): untyped = 3 + let a = 1 + # outputs 3: + echo a - echo atomicRead(atomicCounter) +The matching is performed after the compiler performed some optimizations like +constant folding, so the following does not work: +.. code-block:: nim -The `locks` pragma takes a list of lock expressions `locks: [a, b, ...]` -in order to support *multi lock* statements. Why these are essential is -explained in the `lock levels <#guards-and-locks-lock-levels>`_ section. + template t{0|1}(): untyped = 3 + # outputs 1: + echo 1 +The reason is that the compiler already transformed the 1 into "1" for +the `echo` statement. However, a term rewriting macro should not change the +semantics anyway. In fact, they can be deactivated with the `--patterns:off`:option: +command line option or temporarily with the `patterns` pragma. -Protecting general locations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The `guard` annotation can also be used to protect fields within an object. -The guard then needs to be another field within the same object or a -global variable. +The `{}` operator +~~~~~~~~~~~~~~~~~~~ -Since objects can reside on the heap or on the stack this greatly enhances the -expressivity of the language: +A pattern expression can be bound to a pattern parameter via the `expr{param}` +notation: .. code-block:: nim - type - ProtectedCounter = object - v {.guard: L.}: int - L: TLock + template t{(0|1|2){x}}(x: untyped): untyped = x + 1 + let a = 1 + # outputs 2: + echo a + - proc incCounters(counters: var openArray[ProtectedCounter]) = - for i in 0..counters.high: - lock counters[i].L: - inc counters[i].v +The `~` operator +~~~~~~~~~~~~~~~~~~ -The access to field `x.v` is allowed since its guard `x.L` is active. -After template expansion, this amounts to: +The `~` operator is the 'not' operator in patterns: .. code-block:: nim - proc incCounters(counters: var openArray[ProtectedCounter]) = - for i in 0..counters.high: - pthread_mutex_lock(counters[i].L) - {.locks: [counters[i].L].}: - try: - inc counters[i].v - finally: - pthread_mutex_unlock(counters[i].L) + template t{x = (~x){y} and (~x){z}}(x, y, z: bool) = + x = y + if x: x = z + + var + a = false + b = true + c = false + a = b and c + echo a -There is an analysis that checks that `counters[i].L` is the lock that -corresponds to the protected location `counters[i].v`. This analysis is called -`path analysis`:idx: because it deals with paths to locations -like `obj.field[i].fieldB[j]`. -The path analysis is **currently unsound**, but that doesn't make it useless. -Two paths are considered equivalent if they are syntactically the same. +The `*` operator +~~~~~~~~~~~~~~~~~~ -This means the following compiles (for now) even though it really should not: +The `*` operator can *flatten* a nested binary expression like `a & b & c` +to `&(a, b, c)`: .. code-block:: nim - {.locks: [a[i].L].}: - inc i - access a[i].v + var + calls = 0 + + proc `&&`(s: varargs[string]): string = + result = s[0] + for i in 1..len(s)-1: result.add s[i] + inc calls + template optConc{ `&&` * a }(a: string): untyped = &&a + let space = " " + echo "my" && (space & "awe" && "some " ) && "concat" -Lock levels ------------ + # check that it's been optimized properly: + doAssert calls == 1 -Lock levels are used to enforce a global locking order in order to detect -potential deadlocks during semantic analysis. A lock level is an constant -integer in the range 0..1_000. Lock level 0 means that no lock is acquired at -all. -If a section of code holds a lock of level `M` than it can also acquire any -lock of level `N < M`. Another lock of level `M` cannot be acquired. Locks -of the same level can only be acquired *at the same time* within a -single `locks` section: +The second operator of `*` must be a parameter; it is used to gather all the +arguments. The expression `"my" && (space & "awe" && "some " ) && "concat"` +is passed to `optConc` in `a` as a special list (of kind `nkArgList`) +which is flattened into a call expression; thus the invocation of `optConc` +produces: .. code-block:: nim - var a, b: TLock[2] - var x: TLock[1] - # invalid locking order: TLock[1] cannot be acquired before TLock[2]: - {.locks: [x].}: - {.locks: [a].}: - ... - # valid locking order: TLock[2] acquired before TLock[1]: - {.locks: [a].}: - {.locks: [x].}: - ... - - # invalid locking order: TLock[2] acquired before TLock[2]: - {.locks: [a].}: - {.locks: [b].}: - ... + `&&`("my", space & "awe", "some ", "concat") - # valid locking order, locks of the same level acquired at the same time: - {.locks: [a, b].}: - ... +The `**` operator +~~~~~~~~~~~~~~~~~~~ -Here is how a typical multilock statement can be implemented in Nim. Note how -the runtime check is required to ensure a global ordering for two locks `a` -and `b` of the same lock level: +The `**` is much like the `*` operator, except that it gathers not only +all the arguments, but also the matched operators in reverse polish notation: .. code-block:: nim - template multilock(a, b: ptr TLock; body: untyped) = - if cast[ByteAddress](a) < cast[ByteAddress](b): - pthread_mutex_lock(a) - pthread_mutex_lock(b) - else: - pthread_mutex_lock(b) - pthread_mutex_lock(a) - {.locks: [a, b].}: - try: - body - finally: - pthread_mutex_unlock(a) - pthread_mutex_unlock(b) + import std/macros + type + Matrix = object + dummy: int -Whole routines can also be annotated with a `locks` pragma that takes a lock -level. This then means that the routine may acquire locks of up to this level. -This is essential so that procs can be called within a `locks` section: + proc `*`(a, b: Matrix): Matrix = discard + proc `+`(a, b: Matrix): Matrix = discard + proc `-`(a, b: Matrix): Matrix = discard + proc `$`(a: Matrix): string = result = $a.dummy + proc mat21(): Matrix = + result.dummy = 21 -.. code-block:: nim + macro optM{ (`+`|`-`|`*`) ** a }(a: Matrix): untyped = + echo treeRepr(a) + result = newCall(bindSym"mat21") - proc p() {.locks: 3.} = discard + var x, y, z: Matrix - var a: TLock[4] - {.locks: [a].}: - # p's locklevel (3) is strictly less than a's (4) so the call is allowed: - p() + echo x + y * z - x +This passes the expression `x + y * z - x` to the `optM` macro as +an `nnkArgList` node containing:: -As usual `locks` is an inferred effect and there is a subtype -relation: `proc () {.locks: N.}` is a subtype of `proc () {.locks: M.}` -iff (M <= N). + Arglist + Sym "x" + Sym "y" + Sym "z" + Sym "*" + Sym "+" + Sym "x" + Sym "-" -The `locks` pragma can also take the special value `"unknown"`. This -is useful in the context of dynamic method dispatching. In the following -example, the compiler can infer a lock level of 0 for the `base` case. -However, one of the overloaded methods calls a procvar which is -potentially locking. Thus, the lock level of calling `g.testMethod` -cannot be inferred statically, leading to compiler warnings. By using -`{.locks: "unknown".}`, the base method can be marked explicitly as -having unknown lock level as well: +(This is the reverse polish notation of `x + y * z - x`.) -.. code-block:: nim - type SomeBase* = ref object of RootObj - type SomeDerived* = ref object of SomeBase - memberProc*: proc () +Parameters +---------- - method testMethod(g: SomeBase) {.base, locks: "unknown".} = discard - method testMethod(g: SomeDerived) = - if g.memberProc != nil: - g.memberProc() +Parameters in a pattern are type checked in the matching process. If a +parameter is of the type `varargs`, it is treated specially and can match +0 or more arguments in the AST to be matched against: + +.. code-block:: nim + + template optWrite{ + write(f, x) + ((write|writeLine){w})(f, y) + }(x, y: varargs[untyped], f: File, w: untyped) = + w(f, x, y) noRewrite pragma @@ -1731,324 +1585,304 @@ e.g. with given example `echo("ab")` will be rewritten just once: `noRewrite` pragma can be useful to control term-rewriting macros recursion. -Aliasing restrictions in parameter passing -========================================== - -**Note**: The aliasing restrictions are currently not enforced by the -implementation and need to be fleshed out further. - -"Aliasing" here means that the underlying storage locations overlap in memory -at runtime. An "output parameter" is a parameter of type `var T`, -an input parameter is any parameter that is not of type `var`. - -1. Two output parameters should never be aliased. -2. An input and an output parameter should not be aliased. -3. An output parameter should never be aliased with a global or thread local - variable referenced by the called proc. -4. An input parameter should not be aliased with a global or thread local - variable updated by the called proc. - -One problem with rules 3 and 4 is that they affect specific global or thread -local variables, but Nim's effect tracking only tracks "uses no global variable" -via `.noSideEffect`. The rules 3 and 4 can also be approximated by a different rule: - -5. A global or thread local variable (or a location derived from such a location) - can only passed to a parameter of a `.noSideEffect` proc. - -Noalias annotation -================== +Example: Partial evaluation +--------------------------- -Since version 1.4 of the Nim compiler, there is a `.noalias` annotation for variables -and parameters. It is mapped directly to C/C++'s `restrict`:c: keyword and means that -the underlying pointer is pointing to a unique location in memory, no other aliases to -this location exist. It is *unchecked* that this alias restriction is followed, if the -restriction is violated, the backend optimizer is free to miscompile the code. -This is an **unsafe** language feature. +The following example shows how some simple partial evaluation can be +implemented with term rewriting: -Ideally in later versions of the language, the restriction will be enforced at -compile time. (Which is also why the name `noalias` was choosen instead of a more -verbose name like `unsafeAssumeNoAlias`.) +.. code-block:: nim + proc p(x, y: int; cond: bool): int = + result = if cond: x + y else: x - y -Strict funcs -============ + template optP1{p(x, y, true)}(x, y: untyped): untyped = x + y + template optP2{p(x, y, false)}(x, y: untyped): untyped = x - y -Since version 1.4 a stricter definition of "side effect" is available. In addition -to the existing rule that a side effect is calling a function with side effects -the following rule is also enforced: -Any mutation to an object does count as a side effect if that object is reachable -via a parameter that is not declared as a `var` parameter. +Example: Hoisting +----------------- -For example: +The following example shows how some form of hoisting can be implemented: .. code-block:: nim - {.experimental: "strictFuncs".} - - type - Node = ref object - le, ri: Node - data: string - - func len(n: Node): int = - # valid: len does not have side effects - var it = n - while it != nil: - inc result - it = it.ri - - func mut(n: Node) = - let m = n # is the statement that connected the mutation to the parameter - m.data = "yeah" # the mutation is here - # Error: 'mut' can have side effects - # an object reachable from 'n' is potentially mutated - - -The algorithm behind this analysis is described in -the `view types section <#view-types-algorithm>`_. + import std/pegs + template optPeg{peg(pattern)}(pattern: string{lit}): Peg = + var gl {.global, gensym.} = peg(pattern) + gl -View types -========== + for i in 0 .. 3: + echo match("(a b c)", peg"'(' @ ')'") + echo match("W_HI_Le", peg"\y 'while'") -**Note**: `--experimental:views`:option: is more effective -with `--experimental:strictFuncs`:option:. +The `optPeg` template optimizes the case of a peg constructor with a string +literal, so that the pattern will only be parsed once at program startup and +stored in a global `gl` which is then re-used. This optimization is called +hoisting because it is comparable to classical loop hoisting. -A view type is a type that is or contains one of the following types: -- `lent T` (view into `T`) -- `openArray[T]` (pair of (pointer to array of `T`, size)) +AST based overloading +===================== -For example: +Parameter constraints can also be used for ordinary routine parameters; these +constraints then affect ordinary overloading resolution: .. code-block:: nim - type - View1 = openArray[byte] - View2 = lent string - View3 = Table[openArray[char], int] - - -Exceptions to this rule are types constructed via `ptr` or `proc`. -For example, the following types are **not** view types: - -.. code-block:: nim + proc optLit(a: string{lit|`const`}) = + echo "string literal" + proc optLit(a: string) = + echo "no string literal" - type - NotView1 = proc (x: openArray[int]) - NotView2 = ptr openArray[char] - NotView3 = ptr array[4, lent int] + const + constant = "abc" + var + variable = "xyz" -The mutability aspect of a view type is not part of the type but part -of the locations it's derived from. More on this later. + optLit("literal") + optLit(constant) + optLit(variable) -A *view* is a symbol (a let, var, const, etc.) that has a view type. +However, the constraints `alias` and `noalias` are not available in +ordinary routines. -Since version 1.4 Nim allows view types to be used as local variables. -This feature needs to be enabled via `{.experimental: "views".}`. -A local variable of a view type *borrows* from the locations and -it is statically enforced that the view does not outlive the location -it was borrowed from. +Parallel & Spawn +================ -For example: +Nim has two flavors of parallelism: +1) `Structured`:idx: parallelism via the `parallel` statement. +2) `Unstructured`:idx: parallelism via the standalone `spawn` statement. -.. code-block:: nim +Nim has a builtin thread pool that can be used for CPU intensive tasks. For +IO intensive tasks the `async` and `await` features should be +used instead. Both parallel and spawn need the `threadpool `_ +module to work. - {.experimental: "views".} +Somewhat confusingly, `spawn` is also used in the `parallel` statement +with slightly different semantics. `spawn` always takes a call expression of +the form `f(a, ...)`. Let `T` be `f`'s return type. If `T` is `void`, +then `spawn`'s return type is also `void`, otherwise it is `FlowVar[T]`. - proc take(a: openArray[int]) = - echo a.len +Within a `parallel` section, the `FlowVar[T]` is sometimes eliminated +to `T`. This happens when `T` does not contain any GC'ed memory. +The compiler can ensure the location in `location = spawn f(...)` is not +read prematurely within a `parallel` section and so there is no need for +the overhead of an indirection via `FlowVar[T]` to ensure correctness. - proc main(s: seq[int]) = - var x: openArray[int] = s # 'x' is a view into 's' - # it is checked that 'x' does not outlive 's' and - # that 's' is not mutated. - for i in 0 .. high(x): - echo x[i] - take(x) +.. note:: Currently exceptions are not propagated between `spawn`'ed tasks! - take(x.toOpenArray(0, 1)) # slicing remains possible - let y = x # create a view from a view - take y - # it is checked that 'y' does not outlive 'x' and - # that 'x' is not mutated as long as 'y' lives. +This feature is likely to be removed in the future as external packages +can have better solutions. - main(@[11, 22, 33]) +Spawn statement +--------------- +The `spawn`:idx: statement can be used to pass a task to the thread pool: -A local variable of a view type can borrow from a location -derived from a parameter, another local variable, a global `const` or `let` -symbol or a thread-local `var` or `let`. +.. code-block:: nim -Let `p` the proc that is analysed for the correctness of the borrow operation. + import std/threadpool -Let `source` be one of: + proc processLine(line: string) = + discard "do some heavy lifting here" -- A formal parameter of `p`. Note that this does not cover parameters of - inner procs. -- The `result` symbol of `p`. -- A local `var` or `let` or `const` of `p`. Note that this does - not cover locals of inner procs. -- A thread-local `var` or `let`. -- A global `let` or `const`. -- A constant array/seq/object/tuple constructor. + for x in lines("myinput.txt"): + spawn processLine(x) + sync() +For reasons of type safety and implementation simplicity the expression +that `spawn` takes is restricted: -Path expressions ----------------- +* It must be a call expression `f(a, ...)`. +* `f` must be `gcsafe`. +* `f` must not have the calling convention `closure`. +* `f`'s parameters may not be of type `var`. + This means one has to use raw `ptr`'s for data passing reminding the + programmer to be careful. +* `ref` parameters are deeply copied, which is a subtle semantic change and + can cause performance problems, but ensures memory safety. This deep copy + is performed via `system.deepCopy`, so it can be overridden. +* For *safe* data exchange between `f` and the caller, a global `Channel` + needs to be used. However, since spawn can return a result, often no further + communication is required. -A location derived from `source` is then defined as a path expression that -has `source` as the owner. A path expression `e` is defined recursively: -- `source` itself is a path expression. -- Container access like `e[i]` is a path expression. -- Tuple access `e[0]` is a path expression. -- Object field access `e.field` is a path expression. -- `system.toOpenArray(e, ...)` is a path expression. -- Pointer dereference `e[]` is a path expression. -- An address `addr e`, `unsafeAddr e` is a path expression. -- A type conversion `T(e)` is a path expression. -- A cast expression `cast[T](e)` is a path expression. -- `f(e, ...)` is a path expression if `f`'s return type is a view type. - Because the view can only have been borrowed from `e`, we then know - that owner of `f(e, ...)` is `e`. +`spawn` executes the passed expression on the thread pool and returns +a `data flow variable`:idx: `FlowVar[T]` that can be read from. The reading +with the `^` operator is **blocking**. However, one can use `blockUntilAny` to +wait on multiple flow variables at the same time: +.. code-block:: nim -If a view type is used as a return type, the location must borrow from a location -that is derived from the first parameter that is passed to the proc. -See https://nim-lang.org/docs/manual.html#procedures-var-return-type for -details about how this is done for `var T`. + import std/threadpool, ... -A mutable view can borrow from a mutable location, an immutable view can borrow -from both a mutable or an immutable location. + # wait until 2 out of 3 servers received the update: + proc main = + var responses = newSeq[FlowVarBase](3) + for i in 0..2: + responses[i] = spawn tellServer(Update, "key", "value") + var index = blockUntilAny(responses) + assert index >= 0 + responses.del(index) + discard blockUntilAny(responses) -If a view borrows from a mutable location, the view can be used to update the -location. Otherwise it cannot be used for mutations. +Data flow variables ensure that no data races are possible. Due to +technical limitations, not every type `T` can be used in +a data flow variable: `T` has to be a `ref`, `string`, `seq` +or of a type that doesn't contain any GC'd type. This +restriction is not hard to work-around in practice. -The *duration* of a borrow is the span of commands beginning from the assignment -to the view and ending with the last usage of the view. -For the duration of the borrow operation, no mutations to the borrowed locations -may be performed except via the view that borrowed from the -location. The borrowed location is said to be *sealed* during the borrow. -.. code-block:: nim +Parallel statement +------------------ - {.experimental: "views".} +Example: - type - Obj = object - field: string +.. code-block:: nim + :test: "nim c --threads:on $1" - proc dangerous(s: var seq[Obj]) = - let v: lent Obj = s[0] # seal 's' - s.setLen 0 # prevented at compile-time because 's' is sealed. - echo v.field + # Compute pi in an inefficient way + import std/[strutils, math, threadpool] + {.experimental: "parallel".} + proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1) -The scope of the view does not matter: + proc pi(n: int): float = + var ch = newSeq[float](n + 1) + parallel: + for k in 0..ch.high: + ch[k] = spawn term(float(k)) + for k in 0..ch.high: + result += ch[k] -.. code-block:: nim + echo formatFloat(pi(5000)) - proc valid(s: var seq[Obj]) = - let v: lent Obj = s[0] # begin of borrow - echo v.field # end of borrow - s.setLen 0 # valid because 'v' isn't used afterwards +The parallel statement is the preferred mechanism to introduce parallelism in a +Nim program. Only a subset of the Nim language is valid within a `parallel` +section. This subset is checked during semantic analysis to be free of data +races. A sophisticated `disjoint checker`:idx: ensures that no data races are +possible, even though shared memory is extensively supported! -The analysis requires as much precision about mutations as is reasonably obtainable, -so it is more effective with the experimental `strict funcs <#strict-funcs>`_ -feature. In other words `--experimental:views`:option: works better -with `--experimental:strictFuncs`:option:. +The subset is in fact the full language with the following +restrictions / changes: -The analysis is currently control flow insensitive: +* `spawn` within a `parallel` section has special semantics. +* Every location of the form `a[i]`, `a[i..j]` and `dest` where + `dest` is part of the pattern `dest = spawn f(...)` has to be + provably disjoint. This is called the *disjoint check*. +* Every other complex location `loc` that is used in a spawned + proc (`spawn f(loc)`) has to be immutable for the duration of + the `parallel` section. This is called the *immutability check*. Currently + it is not specified what exactly "complex location" means. We need to make + this an optimization! +* Every array access has to be provably within bounds. This is called + the *bounds check*. +* Slices are optimized so that no copy is performed. This optimization is not + yet performed for ordinary slices outside of a `parallel` section. -.. code-block:: nim - proc invalid(s: var seq[Obj]) = - let v: lent Obj = s[0] - if false: - s.setLen 0 - echo v.field -In this example, the compiler assumes that `s.setLen 0` invalidates the -borrow operation of `v` even though a human being can easily see that it -will never do that at runtime. +Lock levels +=========== +Lock levels are used to enforce a global locking order in order to detect +potential deadlocks during semantic analysis. A lock level is an constant +integer in the range 0..1_000. Lock level 0 means that no lock is acquired at +all. -Start of a borrow ------------------ +If a section of code holds a lock of level `M`, it can also acquire any +lock of level `N < M`. Another lock of level `M` cannot be acquired. Locks +of the same level can only be acquired *at the same time* within a +single `locks` section: -A borrow starts with one of the following: +.. code-block:: nim -- The assignment of a non-view-type to a view-type. -- The assignment of a location that is derived from a local parameter - to a view-type. + var a, b: TLock[2] + var x: TLock[1] + # invalid locking order: TLock[1] cannot be acquired before TLock[2]: + {.locks: [x].}: + {.locks: [a].}: + ... + # valid locking order: TLock[2] acquired before TLock[1]: + {.locks: [a].}: + {.locks: [x].}: + ... + # invalid locking order: TLock[2] acquired before TLock[2]: + {.locks: [a].}: + {.locks: [b].}: + ... -End of a borrow ---------------- + # valid locking order, locks of the same level acquired at the same time: + {.locks: [a, b].}: + ... -A borrow operation ends with the last usage of the view variable. +Here is how a typical multilock statement can be implemented in Nim. Note how +the runtime check is required to ensure a global ordering for two locks `a` +and `b` of the same lock level: -Reborrows ---------- +.. code-block:: nim -A view `v` can borrow from multiple different locations. However, the borrow -is always the full span of `v`'s lifetime and every location that is borrowed -from is sealed during `v`'s lifetime. + template multilock(a, b: ptr TLock; body: untyped) = + if cast[ByteAddress](a) < cast[ByteAddress](b): + pthread_mutex_lock(a) + pthread_mutex_lock(b) + else: + pthread_mutex_lock(b) + pthread_mutex_lock(a) + {.locks: [a, b].}: + try: + body + finally: + pthread_mutex_unlock(a) + pthread_mutex_unlock(b) -Algorithm ---------- +Whole routines can also be annotated with a `locks` pragma that takes a lock +level. This then means that the routine may acquire locks of up to this level. +This is essential so that procs can be called within a `locks` section: -The following section is an outline of the algorithm that the current implementation -uses. The algorithm performs two traversals over the AST of the procedure or global -section of code that uses a view variable. No fixpoint iterations are performed, the -complexity of the analysis is O(N) where N is the number of nodes of the AST. +.. code-block:: nim -The first pass over the AST computes the lifetime of each local variable based on -a notion of an "abstract time", in the implementation it's a simple integer that is -incremented for every visited node. + proc p() {.locks: 3.} = discard -In the second pass information about the underlying object "graphs" is computed. -Let `v` be a parameter or a local variable. Let `G(v)` be the graph -that `v` belongs to. A graph is defined by the set of variables that belong -to the graph. Initially for all `v`: `G(v) = {v}`. Every variable can only -be part of a single graph. + var a: TLock[4] + {.locks: [a].}: + # p's locklevel (3) is strictly less than a's (4) so the call is allowed: + p() -Assignments like `a = b` "connect" two variables, both variables end up in the -same graph `{a, b} = G(a) = G(b)`. Unfortunately, the pattern to look for is -much more complex than that and can involve multiple assignment targets -and sources:: - f(x, y) = g(a, b) +As usual, `locks` is an inferred effect and there is a subtype +relation: `proc () {.locks: N.}` is a subtype of `proc () {.locks: M.}` +iff (M <= N). -connects `x` and `y` to `a` and `b`: `G(x) = G(y) = G(a) = G(b) = {x, y, a, b}`. -A type based alias analysis rules out some of these combinations, for example -a `string` value cannot possibly be connected to a `seq[int]`. +The `locks` pragma can also take the special value `"unknown"`. This +is useful in the context of dynamic method dispatching. In the following +example, the compiler can infer a lock level of 0 for the `base` case. +However, one of the overloaded methods calls a procvar which is +potentially locking. Thus, the lock level of calling `g.testMethod` +cannot be inferred statically, leading to compiler warnings. By using +`{.locks: "unknown".}`, the base method can be marked explicitly as +having unknown lock level as well: -A pattern like `v[] = value` or `v.field = value` marks `G(v)` as mutated. -After the second pass a set of disjoint graphs was computed. +.. code-block:: nim -For strict functions it is then enforced that there is no graph that is both mutated -and has an element that is an immutable parameter (that is a parameter that is not -of type `var T`). + type SomeBase* = ref object of RootObj + type SomeDerived* = ref object of SomeBase + memberProc*: proc () -For borrow checking a different set of checks is performed. Let `v` be the view -and `b` the location that is borrowed from. + method testMethod(g: SomeBase) {.base, locks: "unknown".} = discard + method testMethod(g: SomeDerived) = + if g.memberProc != nil: + g.memberProc() -- The lifetime of `v` must not exceed `b`'s lifetime. Note: The lifetime of - a parameter is the complete proc body. -- If `v` is used for a mutation, `b` must be a mutable location too. -- During `v`'s lifetime, `G(b)` can only be modified by `v` (and only if - `v` is a mutable view). -- If `v` is `result` then `b` has to be a location derived from the first - formal parameter or from a constant location. -- A view cannot be used for a read or a write access before it was assigned to. +This feature may be removed in the future due to its practical difficulties. diff --git a/lib/core/macros.nim b/lib/core/macros.nim index 6dbb928756c48..c7d56c0ed8580 100644 --- a/lib/core/macros.nim +++ b/lib/core/macros.nim @@ -465,12 +465,7 @@ proc bindSym*(ident: string | NimNode, rule: BindSymRule = brClosed): NimNode {. ## If `rule == brForceOpen` always an `nnkOpenSymChoice` tree is ## returned even if the symbol is not ambiguous. ## - ## Experimental feature: - ## use {.experimental: "dynamicBindSym".} to activate it. - ## If called from template / regular code, `ident` and `rule` must be - ## constant expression / literal value. - ## If called from macros / compile time procs / static blocks, - ## `ident` and `rule` can be VM computed value. + ## See the `manual `_ for more details. proc genSym*(kind: NimSymKind = nskLet; ident = ""): NimNode {. magic: "NGenSym", noSideEffect.} diff --git a/tests/macros/tcasestmtmacro.nim b/tests/macros/tcasestmtmacro.nim index 26519f637c70f..32019a92a6c0b 100644 --- a/tests/macros/tcasestmtmacro.nim +++ b/tests/macros/tcasestmtmacro.nim @@ -4,8 +4,6 @@ yes ''' """ -{.experimental: "caseStmtMacros".} - import macros macro `case`(n: tuple): untyped =