Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move PNode.comment to a side channel, reducing memory usage during compilation by a factor 1.25x #18760

Merged
merged 6 commits into from
Aug 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 56 additions & 21 deletions compiler/ast.nim
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# abstract syntax tree + symbol table

import
lineinfos, hashes, options, ropes, idents, int128
lineinfos, hashes, options, ropes, idents, int128, tables
from strutils import toLowerAscii

export int128
Expand Down Expand Up @@ -497,6 +497,7 @@ type
nfExecuteOnReload # A top-level statement that will be executed during reloads
nfLastRead # this node is a last read
nfFirstWrite# this node is a first write
nfHasComment # node has a comment

TNodeFlags* = set[TNodeFlag]
TTypeFlag* = enum # keep below 32 for efficiency reasons (now: 43)
Expand Down Expand Up @@ -774,7 +775,6 @@ type
ident*: PIdent
else:
sons*: TNodeSeq
comment*: string

TStrTable* = object # a table[PIdent] of PSym
counter*: int
Expand Down Expand Up @@ -991,6 +991,38 @@ type
TImplication* = enum
impUnknown, impNo, impYes

template nodeId(n: PNode): int = cast[int](n)

type Gconfig = object
# we put comments in a side channel to avoid increasing `sizeof(TNode)`, which
# reduces memory usage given that `PNode` is the most allocated type by far.
comments: Table[int, string] # nodeId => comment
useIc*: bool

var gconfig {.threadvar.}: Gconfig

proc setUseIc*(useIc: bool) = gconfig.useIc = useIc

proc comment*(n: PNode): string =
if nfHasComment in n.flags and not gconfig.useIc:
# IC doesn't track comments, see `packed_ast`, so this could fail
result = gconfig.comments[n.nodeId]

proc `comment=`*(n: PNode, a: string) =
let id = n.nodeId
if a.len > 0:
# if needed, we could periodically cleanup gconfig.comments when its size increases,
# to ensure only live nodes (and with nfHasComment) have an entry in gconfig.comments;
# for compiling compiler, the waste is very small:
# num calls to newNodeImpl: 14984160 (num of PNode allocations)
# size of gconfig.comments: 33585
# num of nodes with comments that were deleted and hence wasted: 3081
n.flags.incl nfHasComment
gconfig.comments[id] = a
elif nfHasComment in n.flags:
n.flags.excl nfHasComment
gconfig.comments.del(id)

# BUGFIX: a module is overloadable so that a proc can have the
# same name as an imported module. This is necessary because of
# the poor naming choices in the standard library.
Expand Down Expand Up @@ -1207,37 +1239,40 @@ when defined(useNodeIds):
const nodeIdToDebug* = -1 # 2322968
var gNodeId: int

proc newNode*(kind: TNodeKind): PNode =
## new node with unknown line info, no type, and no children
result = PNode(kind: kind, info: unknownLineInfo)
template newNodeImpl(info2) =
result = PNode(kind: kind, info: info2)
when false:
# this would add overhead, so we skip it; it results in a small amount of leaked entries
# for old PNode that gets re-allocated at the same address as a PNode that
# has `nfHasComment` set (and an entry in that table). Only `nfHasComment`
# should be used to test whether a PNode has a comment; gconfig.comments
# can contain extra entries for deleted PNode's with comments.
gconfig.comments.del(cast[int](result))

template setIdMaybe() =
when defined(useNodeIds):
result.id = gNodeId
if result.id == nodeIdToDebug:
echo "KIND ", result.kind
writeStackTrace()
inc gNodeId

proc newNode*(kind: TNodeKind): PNode =
## new node with unknown line info, no type, and no children
newNodeImpl(unknownLineInfo)
setIdMaybe()

proc newNodeI*(kind: TNodeKind, info: TLineInfo): PNode =
## new node with line info, no type, and no children
result = PNode(kind: kind, info: info)
when defined(useNodeIds):
result.id = gNodeId
if result.id == nodeIdToDebug:
echo "KIND ", result.kind
writeStackTrace()
inc gNodeId
newNodeImpl(info)
setIdMaybe()

proc newNodeI*(kind: TNodeKind, info: TLineInfo, children: int): PNode =
## new node with line info, type, and children
result = PNode(kind: kind, info: info)
newNodeImpl(info)
if children > 0:
newSeq(result.sons, children)
when defined(useNodeIds):
result.id = gNodeId
if result.id == nodeIdToDebug:
echo "KIND ", result.kind
writeStackTrace()
inc gNodeId
setIdMaybe()

proc newNodeIT*(kind: TNodeKind, info: TLineInfo, typ: PType): PNode =
## new node with line info, type, and no children
Expand Down Expand Up @@ -1644,8 +1679,8 @@ proc copyNode*(src: PNode): PNode =

template transitionNodeKindCommon(k: TNodeKind) =
let obj {.inject.} = n[]
n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags,
comment: obj.comment)
n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags)
# n.comment = obj.comment # shouldn't be needed, the address doesnt' change
when defined(useNodeIds):
n.id = obj.id

Expand Down
3 changes: 2 additions & 1 deletion compiler/commands.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#

# This module handles the parsing of command line arguments.

from ast import setUseIc

# We do this here before the 'import' statement so 'defined' does not get
# confused with 'TGCMode.gcMarkAndSweep' etc.
Expand Down Expand Up @@ -862,6 +862,7 @@ proc processSwitch*(switch, arg: string, pass: TCmdLinePass, info: TLineInfo;
of "v2": conf.symbolFiles = v2Sf
of "stress": conf.symbolFiles = stressTest
else: localError(conf, info, "invalid option for --incremental: " & arg)
setUseIc(conf.symbolFiles != disabledSf)
of "skipcfg":
processOnOffSwitchG(conf, {optSkipSystemConfigFile}, arg, pass, info)
of "skipprojcfg":
Expand Down
14 changes: 9 additions & 5 deletions compiler/parser.nim
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,15 @@ proc validInd(p: var Parser): bool {.inline.} =
proc rawSkipComment(p: var Parser, node: PNode) =
if p.tok.tokType == tkComment:
if node != nil:
var rhs = node.comment
when defined(nimpretty):
if p.tok.commentOffsetB > p.tok.commentOffsetA:
node.comment.add fileSection(p.lex.config, p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB)
rhs.add fileSection(p.lex.config, p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB)
else:
node.comment.add p.tok.literal
rhs.add p.tok.literal
else:
node.comment.add p.tok.literal
rhs.add p.tok.literal
node.comment = move rhs
else:
parMessage(p, errInternal, "skipComment")
getTok(p)
Expand Down Expand Up @@ -1824,8 +1826,10 @@ proc parseRoutine(p: var Parser, kind: TNodeKind): PNode =
if result.comment.len == 0:
# proc fn*(a: int): int = a ## foo
# => moves comment `foo` to `fn`
swap(result.comment, body[0].comment)
else: discard # xxx either `assert false` or issue a warning (otherwise we'll never know of this edge case)
result.comment = body[0].comment
body[0].comment = ""
else:
assert false, p.lex.config$body.info # avoids hard to track bugs, fail early.

proc newCommentStmt(p: var Parser): PNode =
#| commentStmt = COMMENT
Expand Down