From 00454f2650a31ddee7c46794937c34c012539d05 Mon Sep 17 00:00:00 2001 From: Timothee Cour Date: Sun, 29 Aug 2021 06:54:07 -0700 Subject: [PATCH] move PNode.comment to a side channel, reducing memory usage during compilation by a factor 1.25x (#18760) * move PNode.comment so a side channel, reducing memory usage * fix a bug * fixup * use sfHasComment to speedup comment lookups * fix for IC * Update compiler/parser.nim Co-authored-by: Andreas Rumpf --- compiler/ast.nim | 77 +++++++++++++++++++++++++++++++------------ compiler/commands.nim | 3 +- compiler/parser.nim | 14 +++++--- 3 files changed, 67 insertions(+), 27 deletions(-) diff --git a/compiler/ast.nim b/compiler/ast.nim index 446c00f494e7d..5893a5671612a 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -10,7 +10,7 @@ # abstract syntax tree + symbol table import - lineinfos, hashes, options, ropes, idents, int128 + lineinfos, hashes, options, ropes, idents, int128, tables from strutils import toLowerAscii export int128 @@ -497,6 +497,7 @@ type nfExecuteOnReload # A top-level statement that will be executed during reloads nfLastRead # this node is a last read nfFirstWrite# this node is a first write + nfHasComment # node has a comment TNodeFlags* = set[TNodeFlag] TTypeFlag* = enum # keep below 32 for efficiency reasons (now: 43) @@ -774,7 +775,6 @@ type ident*: PIdent else: sons*: TNodeSeq - comment*: string TStrTable* = object # a table[PIdent] of PSym counter*: int @@ -991,6 +991,38 @@ type TImplication* = enum impUnknown, impNo, impYes +template nodeId(n: PNode): int = cast[int](n) + +type Gconfig = object + # we put comments in a side channel to avoid increasing `sizeof(TNode)`, which + # reduces memory usage given that `PNode` is the most allocated type by far. + comments: Table[int, string] # nodeId => comment + useIc*: bool + +var gconfig {.threadvar.}: Gconfig + +proc setUseIc*(useIc: bool) = gconfig.useIc = useIc + +proc comment*(n: PNode): string = + if nfHasComment in n.flags and not gconfig.useIc: + # IC doesn't track comments, see `packed_ast`, so this could fail + result = gconfig.comments[n.nodeId] + +proc `comment=`*(n: PNode, a: string) = + let id = n.nodeId + if a.len > 0: + # if needed, we could periodically cleanup gconfig.comments when its size increases, + # to ensure only live nodes (and with nfHasComment) have an entry in gconfig.comments; + # for compiling compiler, the waste is very small: + # num calls to newNodeImpl: 14984160 (num of PNode allocations) + # size of gconfig.comments: 33585 + # num of nodes with comments that were deleted and hence wasted: 3081 + n.flags.incl nfHasComment + gconfig.comments[id] = a + elif nfHasComment in n.flags: + n.flags.excl nfHasComment + gconfig.comments.del(id) + # BUGFIX: a module is overloadable so that a proc can have the # same name as an imported module. This is necessary because of # the poor naming choices in the standard library. @@ -1207,9 +1239,17 @@ when defined(useNodeIds): const nodeIdToDebug* = -1 # 2322968 var gNodeId: int -proc newNode*(kind: TNodeKind): PNode = - ## new node with unknown line info, no type, and no children - result = PNode(kind: kind, info: unknownLineInfo) +template newNodeImpl(info2) = + result = PNode(kind: kind, info: info2) + when false: + # this would add overhead, so we skip it; it results in a small amount of leaked entries + # for old PNode that gets re-allocated at the same address as a PNode that + # has `nfHasComment` set (and an entry in that table). Only `nfHasComment` + # should be used to test whether a PNode has a comment; gconfig.comments + # can contain extra entries for deleted PNode's with comments. + gconfig.comments.del(cast[int](result)) + +template setIdMaybe() = when defined(useNodeIds): result.id = gNodeId if result.id == nodeIdToDebug: @@ -1217,27 +1257,22 @@ proc newNode*(kind: TNodeKind): PNode = writeStackTrace() inc gNodeId +proc newNode*(kind: TNodeKind): PNode = + ## new node with unknown line info, no type, and no children + newNodeImpl(unknownLineInfo) + setIdMaybe() + proc newNodeI*(kind: TNodeKind, info: TLineInfo): PNode = ## new node with line info, no type, and no children - result = PNode(kind: kind, info: info) - when defined(useNodeIds): - result.id = gNodeId - if result.id == nodeIdToDebug: - echo "KIND ", result.kind - writeStackTrace() - inc gNodeId + newNodeImpl(info) + setIdMaybe() proc newNodeI*(kind: TNodeKind, info: TLineInfo, children: int): PNode = ## new node with line info, type, and children - result = PNode(kind: kind, info: info) + newNodeImpl(info) if children > 0: newSeq(result.sons, children) - when defined(useNodeIds): - result.id = gNodeId - if result.id == nodeIdToDebug: - echo "KIND ", result.kind - writeStackTrace() - inc gNodeId + setIdMaybe() proc newNodeIT*(kind: TNodeKind, info: TLineInfo, typ: PType): PNode = ## new node with line info, type, and no children @@ -1644,8 +1679,8 @@ proc copyNode*(src: PNode): PNode = template transitionNodeKindCommon(k: TNodeKind) = let obj {.inject.} = n[] - n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags, - comment: obj.comment) + n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags) + # n.comment = obj.comment # shouldn't be needed, the address doesnt' change when defined(useNodeIds): n.id = obj.id diff --git a/compiler/commands.nim b/compiler/commands.nim index 82e9081525b17..079717862d8dc 100644 --- a/compiler/commands.nim +++ b/compiler/commands.nim @@ -8,7 +8,7 @@ # # This module handles the parsing of command line arguments. - +from ast import setUseIc # We do this here before the 'import' statement so 'defined' does not get # confused with 'TGCMode.gcMarkAndSweep' etc. @@ -862,6 +862,7 @@ proc processSwitch*(switch, arg: string, pass: TCmdLinePass, info: TLineInfo; of "v2": conf.symbolFiles = v2Sf of "stress": conf.symbolFiles = stressTest else: localError(conf, info, "invalid option for --incremental: " & arg) + setUseIc(conf.symbolFiles != disabledSf) of "skipcfg": processOnOffSwitchG(conf, {optSkipSystemConfigFile}, arg, pass, info) of "skipprojcfg": diff --git a/compiler/parser.nim b/compiler/parser.nim index 30180e545e5eb..225493902ba69 100644 --- a/compiler/parser.nim +++ b/compiler/parser.nim @@ -170,13 +170,15 @@ proc validInd(p: var Parser): bool {.inline.} = proc rawSkipComment(p: var Parser, node: PNode) = if p.tok.tokType == tkComment: if node != nil: + var rhs = node.comment when defined(nimpretty): if p.tok.commentOffsetB > p.tok.commentOffsetA: - node.comment.add fileSection(p.lex.config, p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB) + rhs.add fileSection(p.lex.config, p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB) else: - node.comment.add p.tok.literal + rhs.add p.tok.literal else: - node.comment.add p.tok.literal + rhs.add p.tok.literal + node.comment = move rhs else: parMessage(p, errInternal, "skipComment") getTok(p) @@ -1824,8 +1826,10 @@ proc parseRoutine(p: var Parser, kind: TNodeKind): PNode = if result.comment.len == 0: # proc fn*(a: int): int = a ## foo # => moves comment `foo` to `fn` - swap(result.comment, body[0].comment) - else: discard # xxx either `assert false` or issue a warning (otherwise we'll never know of this edge case) + result.comment = body[0].comment + body[0].comment = "" + else: + assert false, p.lex.config$body.info # avoids hard to track bugs, fail early. proc newCommentStmt(p: var Parser): PNode = #| commentStmt = COMMENT