Skip to content

Commit

Permalink
decouple parser from PNode
Browse files Browse the repository at this point in the history
The main change - introduce a `ParsedNode` type which replaces `PNode`
in the parser. This change allows for further work on decoupling `sem`
from other parts of the compiler, making it easier to implement
improvements in a way that would not rip through the whole codebase and
test suite. Right now introduced type closely mimics the `PNode`
counterpart, but this is just a temporary measure for the transition
period. This commit is a part of multi-step series - full list can be
seen in the related issue nim-works#423

* Documentation changes

- Add missing documentation for changes in the earlier commit, add more
  how-tos to the debugging section (I haven't coded in a while, so was
  especially important to write down explanations for anything I had
  trouble with)
  nim-works@602367b

* Tangentially related refactoring work

- Cleanup the `passes.nim` implementation a bit - despite common (at
  least seemingly shared by many of the previous authors of the
  codebase) misconception longer variable names actually *do* increase
  readability. Also infamous recommendations for the "structured
  programming" also do not really mesh with proliferation of `break`
  statements in the code.

  Add todo/bug comment for the main processing loop bug related to the
  phase ordering in `compiler/sem/passes.nim:234`

* Debugging tools improvements

- Implement `astrepr.nim` support for the `ParsedNode` and `PIdent` -
  `debug` and `treeRepr` procedures.
- Allow skipping repeated symbol in the `(open|closed)SymChoice` node
  kinds in the `astrepr`
- Restructure imports of the `astepr` and move it closer to the
  'primitive' modules - type definitions and trivial data queries. The
  most important change is removal of the `ast.nim` and `renderer.nim`
  imports, which opens these modules for debugging as well.
- Consider possibility of a nil `owner` in the symbol owner chain
  representation calculations in `astrepr`
- Semantic tracer debug output file rotation now uses location of the
  first `.define(` call as a file name base instead of integer-based
  ones. Added basic logging information about created files - now a
  developer can see what is going on and what gets written.

  For example, running with `--define=nimCompilerDebugTraceDir=/tmp` and
  seveal `define(...)` sections produces the following output:

  ```
  comparisons.nim(269, 8): opening /tmp/comparisons_nim_0 trace
  comparisons.nim(274, 7): closing trace, wrote 44 records
  comparisons.nim(276, 8): opening /tmp/comparisons_nim_1 trace
  comparisons.nim(285, 7): closing trace, wrote 329 records
  ```
- Simplify implementation of the `reportInst` handling in the debug
  utils tracer - now each toplevel tracer template must submit the
  location by itself - this solution avoids unintuitive and fragile
  `instLoc(-5)` call which might break with more templates introduced.
  Also updated documentation on the `reportInst` and `reportFrom` in the
  reports file.

- compiler/front/options.nim:693 :: Unconditionally output debugging
  traces if they are requested, regardless of the surrounding hooks and
  filters. Introduce the `bypassWriteHookForTrace` flag in the debugging
  hack controller which makes it possible to bypass the `writeln` hook.

* Further work

- compiler/ast/parser.nim:744 :: introduce two tokens in order to handle
  custom literals. There is no real need to mash together everything in
  a single chunk of text that would have to be split apart down the
  line.
  • Loading branch information
haxscramper committed Sep 3, 2022
1 parent 837238f commit 98d807a
Show file tree
Hide file tree
Showing 25 changed files with 1,163 additions and 538 deletions.
44 changes: 43 additions & 1 deletion compiler/ast/ast.nim
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,23 @@ import
ast_types, # Main ast type definitions
ast_idgen, # Per module Id generation
ast_query, # querying/reading the ast
ast_parsed_types, # Data types for the parsed node
lexer, # NumericalBase
],
compiler/front/[
options
],
compiler/utils/[
ropes,
astrepr,
int128 # Values for integer nodes
],
std/[
strutils,
tables # For symbol table mapping
]

export ast_types, ast_idgen, ast_query, int128
export ast_types, ast_idgen, ast_query, int128, ast_parsed_types

var ggDebug* {.deprecated.}: bool ## convenience switch for trying out things

Expand Down Expand Up @@ -604,3 +607,42 @@ proc toHumanStr*(kind: TSymKind): string =
proc toHumanStr*(kind: TTypeKind): string =
## strips leading `tk`
result = toHumanStrImpl(kind, 2)


proc setBaseFlags(n: PNode, base: NumericalBase) =
case base
of base10: discard
of base2: incl(n.flags, nfBase2)
of base8: incl(n.flags, nfBase8)
of base16: incl(n.flags, nfBase16)


proc toPNode*(parsed: ParsedNode): PNode =
result = newNodeI(parsed.kind, parsed.info)
result.comment = parsed.comment
case parsed.kind:
of nkFloatKinds:
result.floatVal = parsed.token.fNumber
result.setBaseFlags(parsed.token.base)

of nkIntKinds - { nkCharLit }:
result.intVal = parsed.token.iNumber
result.setBaseFlags(parsed.token.base)

of nkCharLit:
result.intVal = ord(parsed.token.literal[0])

of nkStrKinds:
result.strVal = parsed.token.literal

of nkIdent:
result.ident = parsed.token.ident

else:
if parsed.isBlockArg:
result.flags.incl nfBlockArg


for sub in items(parsed):
result.add sub.toPNode()

111 changes: 111 additions & 0 deletions compiler/ast/ast_parsed_types.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
## Data structure for the parser results

import
compiler/ast/[
ast_types, # For the node kinds
lexer # For the token type definition
]

# NOTE further refactoring considerations for the parser
#
# - store everything in tokens, do not require identifier interning for any
# purposes during the parsing stage, it must be done later, during
# conversion to a PNode. This will simplify some parts of the type
# definition.
# - remove nim"pretty" - this is an absolute joke of implementation and
# it should not be placed where it is now.

type
ParsedNode* = ref object
# NOTE next two fields are very large combined, but further plans will
# deal with that problem - current implementation is easier to write
# and it is just a transition point.
info*: TLineInfo # TODO replace line and separate token with index to
# the token, which in turn will store information
# about global positioning (tuple made up of a token
# id and a file ID)
#
# NOTE technically this is not really necessary even
# with the current implementation, but the parser
# consistently copies this information around anyway,
# so I will let it stay this way for now.
token*: Token # TODO Replace full token value with an index information
kind*: TNodeKind # NOTE/QUESTION - for now the same kind of nodes is
# reused as the main parser, to ease the transition,
# but in the future two different sets of node kinds
# might(?) be introduced.

# TODO replace `ref` object tree with begin/end ranges for the nested
# trees in the linearized structure.
sons*: seq[ParsedNode]
comment*: string # TODO this should either be a token or a sequence of
# tokens.

# HACK explicit flags in order to track down all 'extra' information
# that is collected during parsing.
isBlockArg*: bool # QUESTION add 'nkStmtListBlockArg' or similar node
# and convert it to the `nkStmtList` + `nfBlocArg`
# flags later on? Why do we need the `nfBlockArg`
# flag in the first place?

func len*(node: ParsedNode): int =
## Number of the sons of a parsed node
return node.sons.len()

# NOTE added for the sake of API similarity between PNode
proc safeLen*(node: ParsedNode): int = node.len()

proc `[]`*(node: ParsedNode, idx: int | BackwardsIndex): ParsedNode =
return node.sons[idx]

proc `[]=`*(node: ParsedNode, idx: int | BackwardsIndex, other: ParsedNode) =
node.sons[idx] = other

iterator items*(node: ParsedNode): ParsedNode =
for item in node.sons.items():
yield item

iterator pairs*(node: ParsedNode): (int, ParsedNode) =
for idx, item in pairs(node.sons):
yield (idx, item)

proc add*(node: ParsedNode, other: ParsedNode) =
## Add new element to the sons
node.sons.add(other)

proc transitionSonsKind*(n: ParsedNode, kind: TNodeKind) =
n.kind = kind

proc transitionIntKind*(n: ParsedNode, kind: TNodeKind) =
n.kind = kind

proc transitionNoneToSym*(n: ParsedNode) =
n.kind = nkSym

func newParsedNode*(kind: TNodeKind): ParsedNode =
## Create a new parsed node without any location or token information
return ParsedNode(kind: kind, info: unknownLineInfo)

func newParsedNode*(
kind: TNodeKind, info: TLineInfo, sons: seq[ParsedNode] = @[]): ParsedNode =
## Create a new non-leaf parsed node with a specified location
## information and sons.
return ParsedNode(kind: kind, info: info, sons: sons)

func newParsedNode*(kind: TNodeKind, info: TLineInfo, token: Token): ParsedNode =
## Create a new leaf parsed node with the specified location information
## and token kind.
return ParsedNode(kind: kind, info: info, token: token)


proc newProcNode*(
kind: TNodeKind,
info: TLineInfo,
body, params, name, pattern, genericParams,
pragmas, exceptions: ParsedNode
): ParsedNode =

result = newParsedNode(
kind,
info,
@[name, pattern, genericParams, params, pragmas, exceptions, body])
2 changes: 2 additions & 0 deletions compiler/ast/ast_query.nim
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ const
callableDefs* = nkLambdaKinds + routineDefs

nkSymChoices* = {nkClosedSymChoice, nkOpenSymChoice}
nkFloatKinds* = nkFloatLiterals # QUESTION remove float literals
# altogether?
nkStrKinds* = {nkStrLit..nkTripleStrLit}
nkIntKinds* = {nkCharLit .. nkUInt64Lit}

Expand Down
4 changes: 4 additions & 0 deletions compiler/ast/ast_types.nim
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ type
offsetA*, offsetB*: int
commentOffsetA*, commentOffsetB*: int

const
InvalidFileIdx* = FileIndex(-1)
unknownLineInfo* = TLineInfo(line: 0, col: -1, fileIndex: InvalidFileIdx)

type
TCallingConvention* = enum
ccNimCall = "nimcall" ## nimcall, also the default
Expand Down
1 change: 1 addition & 0 deletions compiler/ast/lexer.nim
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,7 @@ proc getNumber(L: var Lexer, result: var Token) =
# XXX: Test this on big endian machine!
of tkFloat64Lit, tkFloatLit:
setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]

else:
L.config.internalError(getLineInfo(L), rintIce, "getNumber")

Expand Down
11 changes: 1 addition & 10 deletions compiler/ast/lineinfos.nim
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,7 @@ proc computeNotesVerbosity(): tuple[
}

when defined(nimDebugUtils):
result.base.incl {
rdbgTraceStart, # Begin report
rdbgTraceStep, # in/out
rdbgTraceLine,
rdbgTraceEnd # End report
}
result.base.incl repDebugTraceKinds

result.main[compVerbosityMax] = result.base + repWarningKinds + repHintKinds - {
rsemObservableStores,
Expand Down Expand Up @@ -201,10 +196,6 @@ proc hash*(i: TLineInfo): Hash =
proc raiseRecoverableError*(msg: string) {.noinline.} =
raise newException(ERecoverableError, msg)

const
InvalidFileIdx* = FileIndex(-1)
unknownLineInfo* = TLineInfo(line: 0, col: -1, fileIndex: InvalidFileIdx)

func isKnown*(info: TLineInfo): bool =
## Check if `info` represents valid source file location
info != unknownLineInfo
Expand Down
Loading

0 comments on commit 98d807a

Please sign in to comment.