decouple parser from PNode

The main change - introduce a `ParsedNode` type which replaces `PNode` in the parser. This change allows for further work on decoupling `sem` from other parts of the compiler, making it easier to implement improvements in a way that would not rip through the whole codebase and test suite. Right now introduced type closely mimics the `PNode` counterpart, but this is just a temporary measure for the transition period. This commit is a part of multi-step series - full list can be seen in the related issue nim-works#423 * Documentation changes - Add missing documentation for changes in the earlier commit, add more how-tos to the debugging section (I haven't coded in a while, so was especially important to write down explanations for anything I had trouble with) nim-works@602367b * Tangentially related refactoring work - Cleanup the `passes.nim` implementation a bit - despite common (at least seemingly shared by many of the previous authors of the codebase) misconception longer variable names actually *do* increase readability. Also infamous recommendations for the "structured programming" also do not really mesh with proliferation of `break` statements in the code. Add todo/bug comment for the main processing loop bug related to the phase ordering in `compiler/sem/passes.nim:234` * Debugging tools improvements - Implement `astrepr.nim` support for the `ParsedNode` and `PIdent` - `debug` and `treeRepr` procedures. - Allow skipping repeated symbol in the `(open|closed)SymChoice` node kinds in the `astrepr` - Restructure imports of the `astepr` and move it closer to the 'primitive' modules - type definitions and trivial data queries. The most important change is removal of the `ast.nim` and `renderer.nim` imports, which opens these modules for debugging as well. - Consider possibility of a nil `owner` in the symbol owner chain representation calculations in `astrepr` - Semantic tracer debug output file rotation now uses location of the first `.define(` call as a file name base instead of integer-based ones. Added basic logging information about created files - now a developer can see what is going on and what gets written. For example, running with `--define=nimCompilerDebugTraceDir=/tmp` and seveal `define(...)` sections produces the following output: ``` comparisons.nim(269, 8): opening /tmp/comparisons_nim_0 trace comparisons.nim(274, 7): closing trace, wrote 44 records comparisons.nim(276, 8): opening /tmp/comparisons_nim_1 trace comparisons.nim(285, 7): closing trace, wrote 329 records ``` - Simplify implementation of the `reportInst` handling in the debug utils tracer - now each toplevel tracer template must submit the location by itself - this solution avoids unintuitive and fragile `instLoc(-5)` call which might break with more templates introduced. Also updated documentation on the `reportInst` and `reportFrom` in the reports file. - compiler/front/options.nim:693 :: Unconditionally output debugging traces if they are requested, regardless of the surrounding hooks and filters. Introduce the `bypassWriteHookForTrace` flag in the debugging hack controller which makes it possible to bypass the `writeln` hook. * Further work - compiler/ast/parser.nim:744 :: introduce two tokens in order to handle custom literals. There is no real need to mash together everything in a single chunk of text that would have to be split apart down the line.
haxscramper · Sep 3, 2022 · 98d807a · 98d807a
1 parent 837238f
commit 98d807a
Show file tree

Hide file tree

Showing 25 changed files with 1,163 additions and 538 deletions.
diff --git a/compiler/ast/ast.nim b/compiler/ast/ast.nim
@@ -16,20 +16,23 @@ import
     ast_types, # Main ast type definitions
     ast_idgen, # Per module Id generation
     ast_query, # querying/reading the ast
+    ast_parsed_types, # Data types for the parsed node
+    lexer, # NumericalBase
   ],
   compiler/front/[
     options
   ],
   compiler/utils/[
     ropes,
+    astrepr,
     int128 # Values for integer nodes
   ],
   std/[
     strutils,
     tables # For symbol table mapping
   ]
 
-export ast_types, ast_idgen, ast_query, int128
+export ast_types, ast_idgen, ast_query, int128, ast_parsed_types
 
 var ggDebug* {.deprecated.}: bool ## convenience switch for trying out things
 
@@ -604,3 +607,42 @@ proc toHumanStr*(kind: TSymKind): string =
 proc toHumanStr*(kind: TTypeKind): string =
   ## strips leading `tk`
   result = toHumanStrImpl(kind, 2)
+
+
+proc setBaseFlags(n: PNode, base: NumericalBase) =
+  case base
+  of base10: discard
+  of base2: incl(n.flags, nfBase2)
+  of base8: incl(n.flags, nfBase8)
+  of base16: incl(n.flags, nfBase16)
+
+
+proc toPNode*(parsed: ParsedNode): PNode =
+  result = newNodeI(parsed.kind, parsed.info)
+  result.comment = parsed.comment
+  case parsed.kind:
+    of nkFloatKinds:
+      result.floatVal = parsed.token.fNumber
+      result.setBaseFlags(parsed.token.base)
+
+    of nkIntKinds - { nkCharLit }:
+      result.intVal = parsed.token.iNumber
+      result.setBaseFlags(parsed.token.base)
+
+    of nkCharLit:
+      result.intVal = ord(parsed.token.literal[0])
+
+    of nkStrKinds:
+      result.strVal = parsed.token.literal
+
+    of nkIdent:
+      result.ident = parsed.token.ident
+
+    else:
+      if parsed.isBlockArg:
+        result.flags.incl nfBlockArg
+
+
+      for sub in items(parsed):
+        result.add sub.toPNode()
+
diff --git a/compiler/ast/ast_parsed_types.nim b/compiler/ast/ast_parsed_types.nim
@@ -0,0 +1,111 @@
+## Data structure for the parser results
+
+import
+  compiler/ast/[
+    ast_types, # For the node kinds
+    lexer # For the token type definition
+  ]
+
+# NOTE further refactoring considerations for the parser
+#
+# - store everything in tokens, do not require identifier interning for any
+#   purposes during the parsing stage, it must be done later, during
+#   conversion to a PNode. This will simplify some parts of the type
+#   definition.
+# - remove nim"pretty" - this is an absolute joke of implementation and
+#   it should not be placed where it is now.
+
+type
+  ParsedNode* = ref object
+    # NOTE next two fields are very large combined, but further plans will
+    # deal with that problem - current implementation is easier to write
+    # and it is just a transition point.
+    info*: TLineInfo # TODO replace line and separate token with index to
+                     # the token, which in turn will store information
+                     # about global positioning (tuple made up of a token
+                     # id and a file ID)
+                     #
+                     # NOTE technically this is not really necessary even
+                     # with the current implementation, but the parser
+                     # consistently copies this information around anyway,
+                     # so I will let it stay this way for now.
+    token*: Token # TODO Replace full token value with an index information
+    kind*: TNodeKind # NOTE/QUESTION - for now the same kind of nodes is
+                     # reused as the main parser, to ease the transition,
+                     # but in the future two different sets of node kinds
+                     # might(?) be introduced.
+
+    # TODO replace `ref` object tree with begin/end ranges for the nested
+    # trees in the linearized structure.
+    sons*: seq[ParsedNode]
+    comment*: string # TODO this should either be a token or a sequence of
+                     # tokens.
+
+    # HACK explicit flags in order to track down all 'extra' information
+    # that is collected during parsing.
+    isBlockArg*: bool # QUESTION add 'nkStmtListBlockArg' or similar node
+                      # and convert it to the `nkStmtList` + `nfBlocArg`
+                      # flags later on? Why do we need the `nfBlockArg`
+                      # flag in the first place?
+
+func len*(node: ParsedNode): int =
+  ## Number of the sons of a parsed node
+  return node.sons.len()
+
+# NOTE added for the sake of API similarity between PNode
+proc safeLen*(node: ParsedNode): int = node.len()
+
+proc `[]`*(node: ParsedNode, idx: int | BackwardsIndex): ParsedNode =
+  return node.sons[idx]
+
+proc `[]=`*(node: ParsedNode, idx: int | BackwardsIndex, other: ParsedNode) =
+  node.sons[idx] = other
+
+iterator items*(node: ParsedNode): ParsedNode =
+  for item in node.sons.items():
+    yield item
+
+iterator pairs*(node: ParsedNode): (int, ParsedNode) =
+  for idx, item in pairs(node.sons):
+    yield (idx, item)
+
+proc add*(node: ParsedNode, other: ParsedNode) =
+  ## Add new element to the sons
+  node.sons.add(other)
+
+proc transitionSonsKind*(n: ParsedNode, kind: TNodeKind) =
+  n.kind = kind
+
+proc transitionIntKind*(n: ParsedNode, kind: TNodeKind) =
+  n.kind = kind
+
+proc transitionNoneToSym*(n: ParsedNode) =
+  n.kind = nkSym
+
+func newParsedNode*(kind: TNodeKind): ParsedNode =
+  ## Create a new parsed node without any location or token information
+  return ParsedNode(kind: kind, info: unknownLineInfo)
+
+func newParsedNode*(
+  kind: TNodeKind, info: TLineInfo, sons: seq[ParsedNode] = @[]): ParsedNode =
+  ## Create a new non-leaf parsed node with a specified location
+  ## information and sons.
+  return ParsedNode(kind: kind, info: info, sons: sons)
+
+func newParsedNode*(kind: TNodeKind, info: TLineInfo, token: Token): ParsedNode =
+  ## Create a new leaf parsed node with the specified location information
+  ## and token kind.
+  return ParsedNode(kind: kind, info: info, token: token)
+
+
+proc newProcNode*(
+    kind: TNodeKind,
+    info: TLineInfo,
+    body, params, name, pattern, genericParams,
+    pragmas, exceptions: ParsedNode
+  ): ParsedNode =
+
+  result = newParsedNode(
+    kind,
+    info,
+    @[name, pattern, genericParams, params, pragmas, exceptions, body])
diff --git a/compiler/ast/ast_query.nim b/compiler/ast/ast_query.nim
@@ -97,6 +97,8 @@ const
   callableDefs* = nkLambdaKinds + routineDefs
 
   nkSymChoices* = {nkClosedSymChoice, nkOpenSymChoice}
+  nkFloatKinds* = nkFloatLiterals # QUESTION remove float literals
+                                  # altogether?
   nkStrKinds* = {nkStrLit..nkTripleStrLit}
   nkIntKinds* = {nkCharLit .. nkUInt64Lit}
 

diff --git a/compiler/ast/ast_types.nim b/compiler/ast/ast_types.nim
@@ -20,6 +20,10 @@ type
       offsetA*, offsetB*: int
       commentOffsetA*, commentOffsetB*: int
 
+const
+  InvalidFileIdx* = FileIndex(-1)
+  unknownLineInfo* = TLineInfo(line: 0, col: -1, fileIndex: InvalidFileIdx)
+
 type
   TCallingConvention* = enum
     ccNimCall = "nimcall"           ## nimcall, also the default

diff --git a/compiler/ast/lexer.nim b/compiler/ast/lexer.nim
@@ -517,6 +517,7 @@ proc getNumber(L: var Lexer, result: var Token) =
           # XXX: Test this on big endian machine!
         of tkFloat64Lit, tkFloatLit:
           setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]
+
         else:
           L.config.internalError(getLineInfo(L), rintIce, "getNumber")
 

diff --git a/compiler/ast/lineinfos.nim b/compiler/ast/lineinfos.nim
@@ -78,12 +78,7 @@ proc computeNotesVerbosity(): tuple[
     }
 
   when defined(nimDebugUtils):
-    result.base.incl {
-      rdbgTraceStart, # Begin report
-      rdbgTraceStep, # in/out
-      rdbgTraceLine,
-      rdbgTraceEnd # End report
-    }
+    result.base.incl repDebugTraceKinds
 
   result.main[compVerbosityMax] = result.base + repWarningKinds + repHintKinds - {
     rsemObservableStores,
@@ -201,10 +196,6 @@ proc hash*(i: TLineInfo): Hash =
 proc raiseRecoverableError*(msg: string) {.noinline.} =
   raise newException(ERecoverableError, msg)
 
-const
-  InvalidFileIdx* = FileIndex(-1)
-  unknownLineInfo* = TLineInfo(line: 0, col: -1, fileIndex: InvalidFileIdx)
-
 func isKnown*(info: TLineInfo): bool =
   ## Check if `info` represents valid source file location
   info != unknownLineInfo