@@ -625,6 +625,64 @@ growable_comment_array_deallocate(growable_comment_array *arr) {
625625 PyMem_Free (arr -> items );
626626}
627627
628+ static int
629+ initialize_token (Parser * p , Token * token , const char * start , const char * end , int token_type ) {
630+ assert (token != NULL );
631+
632+ token -> type = (token_type == NAME ) ? _get_keyword_or_name_type (p , start , (int )(end - start )) : token_type ;
633+ token -> bytes = PyBytes_FromStringAndSize (start , end - start );
634+ if (token -> bytes == NULL ) {
635+ return -1 ;
636+ }
637+
638+ if (_PyArena_AddPyObject (p -> arena , token -> bytes ) < 0 ) {
639+ Py_DECREF (token -> bytes );
640+ return -1 ;
641+ }
642+
643+ const char * line_start = token_type == STRING ? p -> tok -> multi_line_start : p -> tok -> line_start ;
644+ int lineno = token_type == STRING ? p -> tok -> first_lineno : p -> tok -> lineno ;
645+ int end_lineno = p -> tok -> lineno ;
646+
647+ int col_offset = (start != NULL && start >= line_start ) ? (int )(start - line_start ) : -1 ;
648+ int end_col_offset = (end != NULL && end >= p -> tok -> line_start ) ? (int )(end - p -> tok -> line_start ) : -1 ;
649+
650+ token -> lineno = p -> starting_lineno + lineno ;
651+ token -> col_offset = p -> tok -> lineno == 1 ? p -> starting_col_offset + col_offset : col_offset ;
652+ token -> end_lineno = p -> starting_lineno + end_lineno ;
653+ token -> end_col_offset = p -> tok -> lineno == 1 ? p -> starting_col_offset + end_col_offset : end_col_offset ;
654+
655+ p -> fill += 1 ;
656+
657+ if (token_type == ERRORTOKEN && p -> tok -> done == E_DECODE ) {
658+ return raise_decode_error (p );
659+ }
660+
661+ return (token_type == ERRORTOKEN ? tokenizer_error (p ) : 0 );
662+ }
663+
664+ static int
665+ _resize_tokens_array (Parser * p ) {
666+ int newsize = p -> size * 2 ;
667+ Token * * new_tokens = PyMem_Realloc (p -> tokens , newsize * sizeof (Token * ));
668+ if (new_tokens == NULL ) {
669+ PyErr_NoMemory ();
670+ return -1 ;
671+ }
672+ p -> tokens = new_tokens ;
673+
674+ for (int i = p -> size ; i < newsize ; i ++ ) {
675+ p -> tokens [i ] = PyMem_Calloc (1 , sizeof (Token ));
676+ if (p -> tokens [i ] == NULL ) {
677+ p -> size = i ; // Needed, in order to cleanup correctly after parser fails
678+ PyErr_NoMemory ();
679+ return -1 ;
680+ }
681+ }
682+ p -> size = newsize ;
683+ return 0 ;
684+ }
685+
628686int
629687_PyPegen_fill_token (Parser * p )
630688{
@@ -650,7 +708,8 @@ _PyPegen_fill_token(Parser *p)
650708 type = PyTokenizer_Get (p -> tok , & start , & end );
651709 }
652710
653- if (type == ENDMARKER && p -> start_rule == Py_single_input && p -> parsing_started ) {
711+ // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
712+ if (p -> start_rule == Py_single_input && type == ENDMARKER && p -> parsing_started ) {
654713 type = NEWLINE ; /* Add an extra newline */
655714 p -> parsing_started = 0 ;
656715
@@ -663,66 +722,13 @@ _PyPegen_fill_token(Parser *p)
663722 p -> parsing_started = 1 ;
664723 }
665724
666- if (p -> fill == p -> size ) {
667- int newsize = p -> size * 2 ;
668- Token * * new_tokens = PyMem_Realloc (p -> tokens , newsize * sizeof (Token * ));
669- if (new_tokens == NULL ) {
670- PyErr_NoMemory ();
671- return -1 ;
672- }
673- p -> tokens = new_tokens ;
674-
675- for (int i = p -> size ; i < newsize ; i ++ ) {
676- p -> tokens [i ] = PyMem_Malloc (sizeof (Token ));
677- if (p -> tokens [i ] == NULL ) {
678- p -> size = i ; // Needed, in order to cleanup correctly after parser fails
679- PyErr_NoMemory ();
680- return -1 ;
681- }
682- memset (p -> tokens [i ], '\0' , sizeof (Token ));
683- }
684- p -> size = newsize ;
685- }
686-
687- Token * t = p -> tokens [p -> fill ];
688- t -> type = (type == NAME ) ? _get_keyword_or_name_type (p , start , (int )(end - start )) : type ;
689- t -> bytes = PyBytes_FromStringAndSize (start , end - start );
690- if (t -> bytes == NULL ) {
691- return -1 ;
692- }
693- if (_PyArena_AddPyObject (p -> arena , t -> bytes ) < 0 ) {
694- Py_DECREF (t -> bytes );
725+ // Check if we are at the limit of the token array capacity and resize if needed
726+ if ((p -> fill == p -> size ) && (_resize_tokens_array (p ) != 0 )) {
695727 return -1 ;
696728 }
697729
698- int lineno = type == STRING ? p -> tok -> first_lineno : p -> tok -> lineno ;
699- const char * line_start = type == STRING ? p -> tok -> multi_line_start : p -> tok -> line_start ;
700- int end_lineno = p -> tok -> lineno ;
701- int col_offset = -1 ;
702- int end_col_offset = -1 ;
703- if (start != NULL && start >= line_start ) {
704- col_offset = (int )(start - line_start );
705- }
706- if (end != NULL && end >= p -> tok -> line_start ) {
707- end_col_offset = (int )(end - p -> tok -> line_start );
708- }
709-
710- t -> lineno = p -> starting_lineno + lineno ;
711- t -> col_offset = p -> tok -> lineno == 1 ? p -> starting_col_offset + col_offset : col_offset ;
712- t -> end_lineno = p -> starting_lineno + end_lineno ;
713- t -> end_col_offset = p -> tok -> lineno == 1 ? p -> starting_col_offset + end_col_offset : end_col_offset ;
714-
715- p -> fill += 1 ;
716-
717- if (type == ERRORTOKEN ) {
718- if (p -> tok -> done == E_DECODE ) {
719- return raise_decode_error (p );
720- }
721- return tokenizer_error (p );
722-
723- }
724-
725- return 0 ;
730+ Token * t = p -> tokens [p -> fill ];
731+ return initialize_token (p , t , start , end , type );
726732}
727733
728734
0 commit comments