3737#define TABSIZE 8
3838
3939#define MAKE_TOKEN (token_type ) token_setup(tok, token, token_type, p_start, p_end)
40+ #define MAKE_TYPE_COMMENT_TOKEN (token_type , col_offset , end_col_offset ) (\
41+ type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
42+ #define ADVANCE_LINENO () \
43+ tok->lineno++; \
44+ tok->col_offset = 0;
4045
4146/* Forward */
4247static struct tok_state * tok_new (void );
@@ -73,6 +78,8 @@ tok_new(void)
7378 tok -> pendin = 0 ;
7479 tok -> prompt = tok -> nextprompt = NULL ;
7580 tok -> lineno = 0 ;
81+ tok -> starting_col_offset = -1 ;
82+ tok -> col_offset = -1 ;
7683 tok -> level = 0 ;
7784 tok -> altindstack [0 ] = 0 ;
7885 tok -> decoding_state = STATE_INIT ;
@@ -871,7 +878,7 @@ tok_underflow_string(struct tok_state *tok) {
871878 tok -> buf = tok -> cur ;
872879 }
873880 tok -> line_start = tok -> cur ;
874- tok -> lineno ++ ;
881+ ADVANCE_LINENO () ;
875882 tok -> inp = end ;
876883 return 1 ;
877884}
@@ -930,7 +937,7 @@ tok_underflow_interactive(struct tok_state *tok) {
930937 else if (tok -> start != NULL ) {
931938 Py_ssize_t cur_multi_line_start = tok -> multi_line_start - tok -> buf ;
932939 size_t size = strlen (newtok );
933- tok -> lineno ++ ;
940+ ADVANCE_LINENO () ;
934941 if (!tok_reserve_buf (tok , size + 1 )) {
935942 PyMem_Free (tok -> buf );
936943 tok -> buf = NULL ;
@@ -943,7 +950,7 @@ tok_underflow_interactive(struct tok_state *tok) {
943950 tok -> multi_line_start = tok -> buf + cur_multi_line_start ;
944951 }
945952 else {
946- tok -> lineno ++ ;
953+ ADVANCE_LINENO () ;
947954 PyMem_Free (tok -> buf );
948955 tok -> buf = newtok ;
949956 tok -> cur = tok -> buf ;
@@ -998,7 +1005,7 @@ tok_underflow_file(struct tok_state *tok) {
9981005 * tok -> inp = '\0' ;
9991006 }
10001007
1001- tok -> lineno ++ ;
1008+ ADVANCE_LINENO () ;
10021009 if (tok -> decoding_state != STATE_NORMAL ) {
10031010 if (tok -> lineno > 2 ) {
10041011 tok -> decoding_state = STATE_NORMAL ;
@@ -1056,6 +1063,7 @@ tok_nextc(struct tok_state *tok)
10561063 int rc ;
10571064 for (;;) {
10581065 if (tok -> cur != tok -> inp ) {
1066+ tok -> col_offset ++ ;
10591067 return Py_CHARMASK (* tok -> cur ++ ); /* Fast path */
10601068 }
10611069 if (tok -> done != E_OK ) {
@@ -1104,6 +1112,7 @@ tok_backup(struct tok_state *tok, int c)
11041112 if ((int )(unsigned char )* tok -> cur != c ) {
11051113 Py_FatalError ("tok_backup: wrong character" );
11061114 }
1115+ tok -> col_offset -- ;
11071116 }
11081117}
11091118
@@ -1390,21 +1399,33 @@ tok_continuation_line(struct tok_state *tok) {
13901399 return c ;
13911400}
13921401
1402+ static int
1403+ type_comment_token_setup (struct tok_state * tok , struct token * token , int type , int col_offset ,
1404+ int end_col_offset , const char * start , const char * end )
1405+ {
1406+ token -> level = tok -> level ;
1407+ token -> lineno = token -> end_lineno = tok -> lineno ;
1408+ token -> col_offset = col_offset ;
1409+ token -> end_col_offset = end_col_offset ;
1410+ token -> start = start ;
1411+ token -> end = end ;
1412+ return type ;
1413+ }
1414+
13931415static int
13941416token_setup (struct tok_state * tok , struct token * token , int type , const char * start , const char * end )
13951417{
13961418 assert ((start == NULL && end == NULL ) || (start != NULL && end != NULL ));
13971419 token -> level = tok -> level ;
13981420 token -> lineno = type == STRING ? tok -> first_lineno : tok -> lineno ;
13991421 token -> end_lineno = tok -> lineno ;
1400- token -> col_offset = -1 ;
1401- token -> end_col_offset = -1 ;
1422+ token -> col_offset = token -> end_col_offset = -1 ;
14021423 token -> start = start ;
14031424 token -> end = end ;
1425+
14041426 if (start != NULL && end != NULL ) {
1405- const char * line_start = type == STRING ? tok -> multi_line_start : tok -> line_start ;
1406- token -> col_offset = (start >= line_start ) ? (int )(start - line_start ) : -1 ;
1407- token -> end_col_offset = (end >= tok -> line_start ) ? (int )(end - tok -> line_start ) : -1 ;
1427+ token -> col_offset = tok -> starting_col_offset ;
1428+ token -> end_col_offset = tok -> col_offset ;
14081429 }
14091430 return type ;
14101431}
@@ -1419,6 +1440,7 @@ tok_get(struct tok_state *tok, struct token *token)
14191440 const char * p_end = NULL ;
14201441 nextline :
14211442 tok -> start = NULL ;
1443+ tok -> starting_col_offset = -1 ;
14221444 blankline = 0 ;
14231445
14241446 /* Get indentation level */
@@ -1518,6 +1540,7 @@ tok_get(struct tok_state *tok, struct token *token)
15181540 }
15191541
15201542 tok -> start = tok -> cur ;
1543+ tok -> starting_col_offset = tok -> col_offset ;
15211544
15221545 /* Return pending indents/dedents */
15231546 if (tok -> pendin != 0 ) {
@@ -1565,25 +1588,30 @@ tok_get(struct tok_state *tok, struct token *token)
15651588
15661589 /* Set start of current token */
15671590 tok -> start = tok -> cur == NULL ? NULL : tok -> cur - 1 ;
1591+ tok -> starting_col_offset = tok -> col_offset - 1 ;
15681592
15691593 /* Skip comment, unless it's a type comment */
15701594 if (c == '#' ) {
15711595 const char * prefix , * p , * type_start ;
1596+ int current_starting_col_offset ;
15721597
15731598 while (c != EOF && c != '\n' ) {
15741599 c = tok_nextc (tok );
15751600 }
15761601
15771602 if (tok -> type_comments ) {
15781603 p = tok -> start ;
1604+ current_starting_col_offset = tok -> starting_col_offset ;
15791605 prefix = type_comment_prefix ;
15801606 while (* prefix && p < tok -> cur ) {
15811607 if (* prefix == ' ' ) {
15821608 while (* p == ' ' || * p == '\t' ) {
15831609 p ++ ;
1610+ current_starting_col_offset ++ ;
15841611 }
15851612 } else if (* prefix == * p ) {
15861613 p ++ ;
1614+ current_starting_col_offset ++ ;
15871615 } else {
15881616 break ;
15891617 }
@@ -1594,7 +1622,9 @@ tok_get(struct tok_state *tok, struct token *token)
15941622 /* This is a type comment if we matched all of type_comment_prefix. */
15951623 if (!* prefix ) {
15961624 int is_type_ignore = 1 ;
1625+ // +6 in order to skip the word 'ignore'
15971626 const char * ignore_end = p + 6 ;
1627+ const int ignore_end_col_offset = current_starting_col_offset + 6 ;
15981628 tok_backup (tok , c ); /* don't eat the newline or EOF */
15991629
16001630 type_start = p ;
@@ -1615,11 +1645,11 @@ tok_get(struct tok_state *tok, struct token *token)
16151645 tok_nextc (tok );
16161646 tok -> atbol = 1 ;
16171647 }
1618- return MAKE_TOKEN (TYPE_IGNORE );
1648+ return MAKE_TYPE_COMMENT_TOKEN (TYPE_IGNORE , ignore_end_col_offset , tok -> col_offset );
16191649 } else {
16201650 p_start = type_start ;
16211651 p_end = tok -> cur ;
1622- return MAKE_TOKEN (TYPE_COMMENT );
1652+ return MAKE_TYPE_COMMENT_TOKEN (TYPE_COMMENT , current_starting_col_offset , tok -> col_offset );
16231653 }
16241654 }
16251655 }
0 commit comments