Skip to content

Commit f476f86

Browse files
committed
Minor performance fixes
1 parent 3cc2414 commit f476f86

File tree

4 files changed

+18
-9
lines changed

4 files changed

+18
-9
lines changed

syncode/grammar_decoder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,12 +208,12 @@ def _get_partial_codes(self, input_ids: torch.LongTensor) -> list[(str, bytes)]:
208208
if self.parse_output_only:
209209
partial_code, remainder_bytes = self._bytes_to_string(
210210
self.byte_tokenizer.decode(
211-
input_ids[idx, self.start_from:].to('cpu', non_blocking=True).tolist(), skip_special_tokens=True)
211+
input_ids[idx, self.start_from:].tolist(), skip_special_tokens=True)
212212
)
213213
else:
214214
partial_code, remainder_bytes = self._bytes_to_string(
215215
self.byte_tokenizer.decode(
216-
input_ids[idx].to('cpu', non_blocking=True).tolist(), skip_special_tokens=True)
216+
input_ids[idx].tolist(), skip_special_tokens=True)
217217
)
218218
output.append((partial_code, remainder_bytes))
219219
return output

syncode/larkm/lark.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
from .parse_tree_builder import ParseTreeBuilder
2929
from .parser_frontends import _validate_frontend_args, _get_lexer_callbacks, _deserialize_parsing_frontend, _construct_parsing_frontend
3030
from .grammar import Rule
31-
31+
import logging
32+
syn_logger = logging.getLogger(__name__)
3233

3334
try:
3435
import regex
@@ -349,7 +350,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
349350
except FileNotFoundError:
350351
# The cache file doesn't exist; parse and compose the grammar as normal
351352
if self.options.parser == 'lr':
352-
print(f"Cache file {cache_fn} not found. Building LR parser from scratch may take 2-3 minutes for large grammars. This will be cached for future runs.")
353+
syn_logger.info(f"Cache file {cache_fn} not found. Building LR parser from scratch may take 2-3 minutes for large grammars. This will be cached for future runs.")
353354
except Exception: # We should probably narrow done which errors we catch here.
354355
logger.exception("Failed to load Lark from cache: %r. We will try to carry on.", cache_fn)
355356

syncode/larkm/parsers/lalr_interactive_parser.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,18 +103,19 @@ def choices(self):
103103
def accepts(self):
104104
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
105105
accepts = set()
106-
conf_no_callbacks = copy(self.parser_state.parse_conf)
107106
# We don't want to call callbacks here since those might have arbitrary side effects
108107
# and are unnecessarily slow.
109-
conf_no_callbacks.callbacks = {}
110108
choices = self.choices()
111-
112109
if self.parser.parser_type == 'lr':
113110
for t in choices:
114111
if t.isupper():
115112
accepts.add(t)
116113
return accepts
117114

115+
# If the parser is LALR, we need to check each token to see if it's accepted
116+
conf_no_callbacks = copy(self.parser_state.parse_conf)
117+
conf_no_callbacks.callbacks = {}
118+
118119
for t in choices:
119120
if t.isupper(): # is terminal?
120121
new_cursor = copy(self)

syncode/parsers/incremental_parser.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,14 @@ def _store_parser_state(self, pos: int, lexer_tokens: Iterable[Token], parser_st
5757
key = self._get_hash(lexer_tokens[:pos+1])
5858

5959
# parser_state, cur_ac_terminals, next_ac_terminals, indent_levels, dedent_queue
60-
self.cur_pos_to_parser_state[key] = (copy.deepcopy(self.parsed_lexer_tokens), parser_state, cur_ac_terminals, next_ac_terminals, indent_levels, copy.deepcopy(self.dedent_queue))
60+
self.cur_pos_to_parser_state[key] = (
61+
copy.deepcopy(self.parsed_lexer_tokens),
62+
parser_state.copy(),
63+
cur_ac_terminals,
64+
next_ac_terminals,
65+
indent_levels,
66+
copy.deepcopy(self.dedent_queue)
67+
)
6168

6269
self.cur_ac_terminals = copy.deepcopy(cur_ac_terminals)
6370
self.next_ac_terminals = copy.deepcopy(next_ac_terminals)
@@ -151,7 +158,7 @@ def get_acceptable_next_terminals(self, partial_code) -> ParseResult:
151158
self._store_parser_state(
152159
self.cur_pos-1,
153160
lexer_tokens,
154-
interactive.parser_state.copy(),
161+
interactive.parser_state,
155162
self._accepts(interactive))
156163

157164
except lark.exceptions.UnexpectedToken as e:

0 commit comments

Comments
 (0)