Skip to content

Commit

Permalink
move PNode.comment to a side channel, reducing memory usage during co…
Browse files Browse the repository at this point in the history
…mpilation by a factor 1.25x (nim-lang#18760)

* move PNode.comment so a side channel, reducing memory usage

* fix a bug

* fixup

* use sfHasComment to speedup comment lookups

* fix for IC

* Update compiler/parser.nim

Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
  • Loading branch information
2 people authored and PMunch committed Mar 28, 2022
1 parent 53d56fe commit 00454f2
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 27 deletions.
77 changes: 56 additions & 21 deletions compiler/ast.nim
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# abstract syntax tree + symbol table

import
lineinfos, hashes, options, ropes, idents, int128
lineinfos, hashes, options, ropes, idents, int128, tables
from strutils import toLowerAscii

export int128
Expand Down Expand Up @@ -497,6 +497,7 @@ type
nfExecuteOnReload # A top-level statement that will be executed during reloads
nfLastRead # this node is a last read
nfFirstWrite# this node is a first write
nfHasComment # node has a comment

TNodeFlags* = set[TNodeFlag]
TTypeFlag* = enum # keep below 32 for efficiency reasons (now: 43)
Expand Down Expand Up @@ -774,7 +775,6 @@ type
ident*: PIdent
else:
sons*: TNodeSeq
comment*: string

TStrTable* = object # a table[PIdent] of PSym
counter*: int
Expand Down Expand Up @@ -991,6 +991,38 @@ type
TImplication* = enum
impUnknown, impNo, impYes

template nodeId(n: PNode): int = cast[int](n)

type Gconfig = object
# we put comments in a side channel to avoid increasing `sizeof(TNode)`, which
# reduces memory usage given that `PNode` is the most allocated type by far.
comments: Table[int, string] # nodeId => comment
useIc*: bool

var gconfig {.threadvar.}: Gconfig

proc setUseIc*(useIc: bool) = gconfig.useIc = useIc

proc comment*(n: PNode): string =
if nfHasComment in n.flags and not gconfig.useIc:
# IC doesn't track comments, see `packed_ast`, so this could fail
result = gconfig.comments[n.nodeId]

proc `comment=`*(n: PNode, a: string) =
let id = n.nodeId
if a.len > 0:
# if needed, we could periodically cleanup gconfig.comments when its size increases,
# to ensure only live nodes (and with nfHasComment) have an entry in gconfig.comments;
# for compiling compiler, the waste is very small:
# num calls to newNodeImpl: 14984160 (num of PNode allocations)
# size of gconfig.comments: 33585
# num of nodes with comments that were deleted and hence wasted: 3081
n.flags.incl nfHasComment
gconfig.comments[id] = a
elif nfHasComment in n.flags:
n.flags.excl nfHasComment
gconfig.comments.del(id)

# BUGFIX: a module is overloadable so that a proc can have the
# same name as an imported module. This is necessary because of
# the poor naming choices in the standard library.
Expand Down Expand Up @@ -1207,37 +1239,40 @@ when defined(useNodeIds):
const nodeIdToDebug* = -1 # 2322968
var gNodeId: int

proc newNode*(kind: TNodeKind): PNode =
## new node with unknown line info, no type, and no children
result = PNode(kind: kind, info: unknownLineInfo)
template newNodeImpl(info2) =
result = PNode(kind: kind, info: info2)
when false:
# this would add overhead, so we skip it; it results in a small amount of leaked entries
# for old PNode that gets re-allocated at the same address as a PNode that
# has `nfHasComment` set (and an entry in that table). Only `nfHasComment`
# should be used to test whether a PNode has a comment; gconfig.comments
# can contain extra entries for deleted PNode's with comments.
gconfig.comments.del(cast[int](result))

template setIdMaybe() =
when defined(useNodeIds):
result.id = gNodeId
if result.id == nodeIdToDebug:
echo "KIND ", result.kind
writeStackTrace()
inc gNodeId

proc newNode*(kind: TNodeKind): PNode =
## new node with unknown line info, no type, and no children
newNodeImpl(unknownLineInfo)
setIdMaybe()

proc newNodeI*(kind: TNodeKind, info: TLineInfo): PNode =
## new node with line info, no type, and no children
result = PNode(kind: kind, info: info)
when defined(useNodeIds):
result.id = gNodeId
if result.id == nodeIdToDebug:
echo "KIND ", result.kind
writeStackTrace()
inc gNodeId
newNodeImpl(info)
setIdMaybe()

proc newNodeI*(kind: TNodeKind, info: TLineInfo, children: int): PNode =
## new node with line info, type, and children
result = PNode(kind: kind, info: info)
newNodeImpl(info)
if children > 0:
newSeq(result.sons, children)
when defined(useNodeIds):
result.id = gNodeId
if result.id == nodeIdToDebug:
echo "KIND ", result.kind
writeStackTrace()
inc gNodeId
setIdMaybe()

proc newNodeIT*(kind: TNodeKind, info: TLineInfo, typ: PType): PNode =
## new node with line info, type, and no children
Expand Down Expand Up @@ -1644,8 +1679,8 @@ proc copyNode*(src: PNode): PNode =

template transitionNodeKindCommon(k: TNodeKind) =
let obj {.inject.} = n[]
n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags,
comment: obj.comment)
n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags)
# n.comment = obj.comment # shouldn't be needed, the address doesnt' change
when defined(useNodeIds):
n.id = obj.id

Expand Down
3 changes: 2 additions & 1 deletion compiler/commands.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#

# This module handles the parsing of command line arguments.

from ast import setUseIc

# We do this here before the 'import' statement so 'defined' does not get
# confused with 'TGCMode.gcMarkAndSweep' etc.
Expand Down Expand Up @@ -862,6 +862,7 @@ proc processSwitch*(switch, arg: string, pass: TCmdLinePass, info: TLineInfo;
of "v2": conf.symbolFiles = v2Sf
of "stress": conf.symbolFiles = stressTest
else: localError(conf, info, "invalid option for --incremental: " & arg)
setUseIc(conf.symbolFiles != disabledSf)
of "skipcfg":
processOnOffSwitchG(conf, {optSkipSystemConfigFile}, arg, pass, info)
of "skipprojcfg":
Expand Down
14 changes: 9 additions & 5 deletions compiler/parser.nim
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,15 @@ proc validInd(p: var Parser): bool {.inline.} =
proc rawSkipComment(p: var Parser, node: PNode) =
if p.tok.tokType == tkComment:
if node != nil:
var rhs = node.comment
when defined(nimpretty):
if p.tok.commentOffsetB > p.tok.commentOffsetA:
node.comment.add fileSection(p.lex.config, p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB)
rhs.add fileSection(p.lex.config, p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB)
else:
node.comment.add p.tok.literal
rhs.add p.tok.literal
else:
node.comment.add p.tok.literal
rhs.add p.tok.literal
node.comment = move rhs
else:
parMessage(p, errInternal, "skipComment")
getTok(p)
Expand Down Expand Up @@ -1824,8 +1826,10 @@ proc parseRoutine(p: var Parser, kind: TNodeKind): PNode =
if result.comment.len == 0:
# proc fn*(a: int): int = a ## foo
# => moves comment `foo` to `fn`
swap(result.comment, body[0].comment)
else: discard # xxx either `assert false` or issue a warning (otherwise we'll never know of this edge case)
result.comment = body[0].comment
body[0].comment = ""
else:
assert false, p.lex.config$body.info # avoids hard to track bugs, fail early.

proc newCommentStmt(p: var Parser): PNode =
#| commentStmt = COMMENT
Expand Down

0 comments on commit 00454f2

Please sign in to comment.