@@ -45,7 +45,7 @@ void coliter_setup(coliter_t *self, parser_t *parser, int i, int start) {
4545 self -> line_start = parser -> line_start + start ;
4646}
4747
48- coliter_t * coliter_new (parser_t * self , int i ) {
48+ coliter_t * coliter_new (register parser_t * self , int i ) {
4949 // column i, starting at 0
5050 coliter_t * iter = (coliter_t * )malloc (sizeof (coliter_t ));
5151
@@ -97,7 +97,7 @@ static void *grow_buffer(void *buffer, int64_t length, int64_t *capacity,
9797 return newbuffer ;
9898}
9999
100- void parser_set_default_options (parser_t * self ) {
100+ void parser_set_default_options (register parser_t * self ) {
101101 self -> decimal = '.' ;
102102 self -> sci = 'E' ;
103103
@@ -131,11 +131,11 @@ void parser_set_default_options(parser_t *self) {
131131 self -> skip_footer = 0 ;
132132}
133133
134- int get_parser_memory_footprint (parser_t * self ) { return 0 ; }
134+ int get_parser_memory_footprint (register parser_t * self ) { return 0 ; }
135135
136136parser_t * parser_new () { return (parser_t * )calloc (1 , sizeof (parser_t )); }
137137
138- int parser_clear_data_buffers (parser_t * self ) {
138+ int parser_clear_data_buffers (register parser_t * self ) {
139139 free_if_not_null ((void * )& self -> stream );
140140 free_if_not_null ((void * )& self -> words );
141141 free_if_not_null ((void * )& self -> word_starts );
@@ -144,7 +144,7 @@ int parser_clear_data_buffers(parser_t *self) {
144144 return 0 ;
145145}
146146
147- int parser_cleanup (parser_t * self ) {
147+ int parser_cleanup (register parser_t * self ) {
148148 int status = 0 ;
149149
150150 // XXX where to put this
@@ -170,7 +170,7 @@ int parser_cleanup(parser_t *self) {
170170 return status ;
171171}
172172
173- int parser_init (parser_t * self ) {
173+ int parser_init (register parser_t * self ) {
174174 int64_t sz ;
175175
176176 /*
@@ -240,16 +240,16 @@ int parser_init(parser_t *self) {
240240 return 0 ;
241241}
242242
243- void parser_free (parser_t * self ) {
243+ void parser_free (register parser_t * self ) {
244244 // opposite of parser_init
245245 parser_cleanup (self );
246246}
247247
248- void parser_del (parser_t * self ) {
248+ void parser_del (register parser_t * self ) {
249249 free (self );
250250}
251251
252- static int make_stream_space (parser_t * self , size_t nbytes ) {
252+ static int make_stream_space (register parser_t * self , size_t nbytes ) {
253253 int64_t i , cap , length ;
254254 int status ;
255255 void * orig_ptr , * newptr ;
@@ -363,7 +363,7 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
363363 return 0 ;
364364}
365365
366- static int push_char (parser_t * self , char c ) {
366+ static int push_char (register parser_t * self , char c ) {
367367 TRACE (("push_char: self->stream[%zu] = %x, stream_cap=%zu\n" ,
368368 self -> stream_len + 1 , c , self -> stream_cap ))
369369 if (self -> stream_len >= self -> stream_cap ) {
@@ -381,7 +381,7 @@ static int push_char(parser_t *self, char c) {
381381 return 0 ;
382382}
383383
384- int PANDAS_INLINE end_field (parser_t * self ) {
384+ int PANDAS_INLINE end_field (register parser_t * self ) {
385385 // XXX cruft
386386 if (self -> words_len >= self -> words_cap ) {
387387 TRACE (
@@ -419,7 +419,7 @@ int PANDAS_INLINE end_field(parser_t *self) {
419419 return 0 ;
420420}
421421
422- static void append_warning (parser_t * self , const char * msg ) {
422+ static void append_warning (register parser_t * self , const char * msg ) {
423423 int64_t ex_length ;
424424 int64_t length = strlen (msg );
425425 void * newptr ;
@@ -437,7 +437,7 @@ static void append_warning(parser_t *self, const char *msg) {
437437 }
438438}
439439
440- static int end_line (parser_t * self ) {
440+ static int end_line (register parser_t * self ) {
441441 char * msg ;
442442 int64_t fields ;
443443 int ex_fields = self -> expected_fields ;
@@ -556,7 +556,7 @@ static int end_line(parser_t *self) {
556556 return 0 ;
557557}
558558
559- int parser_add_skiprow (parser_t * self , int64_t row ) {
559+ int parser_add_skiprow (register parser_t * self , int64_t row ) {
560560 khiter_t k ;
561561 kh_int64_t * set ;
562562 int ret = 0 ;
@@ -573,7 +573,7 @@ int parser_add_skiprow(parser_t *self, int64_t row) {
573573 return 0 ;
574574}
575575
576- int parser_set_skipfirstnrows (parser_t * self , int64_t nrows ) {
576+ int parser_set_skipfirstnrows (register parser_t * self , int64_t nrows ) {
577577 // self->file_lines is zero based so subtract 1 from nrows
578578 if (nrows > 0 ) {
579579 self -> skip_first_N_rows = nrows - 1 ;
@@ -582,7 +582,7 @@ int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) {
582582 return 0 ;
583583}
584584
585- static int parser_buffer_bytes (parser_t * self , size_t nbytes ) {
585+ static int parser_buffer_bytes (register parser_t * self , size_t nbytes ) {
586586 int status ;
587587 size_t bytes_read ;
588588
@@ -677,18 +677,16 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
677677#define IS_WHITESPACE (c ) ((c == ' ' || c == '\t'))
678678
679679#define IS_TERMINATOR (c ) \
680- ((self->lineterminator == '\0' && c == '\n') || \
681- (self->lineterminator != '\0' && c == self->lineterminator))
680+ (c == line_terminator)
682681
683682#define IS_QUOTE (c ) ((c == self->quotechar && self->quoting != QUOTE_NONE))
684683
685684// don't parse '\r' with a custom line terminator
686- #define IS_CARRIAGE (c ) ((self->lineterminator == '\0' && c == '\r') )
685+ #define IS_CARRIAGE (c ) (c == carriage_symbol )
687686
688- #define IS_COMMENT_CHAR (c ) \
689- ((self->commentchar != '\0' && c == self->commentchar))
687+ #define IS_COMMENT_CHAR (c ) (c == comment_symbol)
690688
691- #define IS_ESCAPE_CHAR (c ) ((self->escapechar != '\0' && c == self->escapechar) )
689+ #define IS_ESCAPE_CHAR (c ) (c == escape_symbol )
692690
693691#define IS_SKIPPABLE_SPACE (c ) \
694692 ((!self->delim_whitespace && c == ' ' && self->skipinitialspace))
@@ -710,7 +708,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
710708 self->datapos += 3; \
711709 }
712710
713- int skip_this_line (parser_t * self , int64_t rownum ) {
711+ int skip_this_line (register parser_t * self , int64_t rownum ) {
714712 int should_skip ;
715713 PyObject * result ;
716714 PyGILState_STATE state ;
@@ -739,13 +737,25 @@ int skip_this_line(parser_t *self, int64_t rownum) {
739737 }
740738}
741739
742- int tokenize_bytes (parser_t * self , size_t line_limit , int64_t start_lines ) {
740+ int tokenize_bytes (register parser_t * self ,
741+ size_t line_limit , int64_t start_lines ) {
743742 int64_t i , slen ;
744743 int should_skip ;
745744 char c ;
746745 char * stream ;
747746 char * buf = self -> data + self -> datapos ;
748747
748+ const char line_terminator = (self -> lineterminator == '\0' ) ?
749+ '\n' : self -> lineterminator ;
750+
751+ // 1000 is something that couldn't fit in "char"
752+ // thus comparing a char to it would always be "false"
753+ const int carriage_symbol = (self -> lineterminator == '\0' ) ? '\r' : 1000 ;
754+ const int comment_symbol = (self -> commentchar != '\0' ) ?
755+ self -> commentchar : 1000 ;
756+ const int escape_symbol = (self -> escapechar != '\0' ) ?
757+ self -> escapechar : 1000 ;
758+
749759 if (make_stream_space (self , self -> datalen - self -> datapos ) < 0 ) {
750760 int64_t bufsize = 100 ;
751761 self -> error_msg = (char * )malloc (bufsize );
@@ -1149,7 +1159,7 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int64_t start_lines) {
11491159 return 0 ;
11501160}
11511161
1152- static int parser_handle_eof (parser_t * self ) {
1162+ static int parser_handle_eof (register parser_t * self ) {
11531163 int64_t bufsize = 100 ;
11541164
11551165 TRACE (
@@ -1194,7 +1204,7 @@ static int parser_handle_eof(parser_t *self) {
11941204 return 0 ;
11951205}
11961206
1197- int parser_consume_rows (parser_t * self , size_t nrows ) {
1207+ int parser_consume_rows (register parser_t * self , size_t nrows ) {
11981208 int64_t i , offset , word_deletions , char_count ;
11991209
12001210 if (nrows > self -> lines ) {
@@ -1250,7 +1260,7 @@ static size_t _next_pow2(size_t sz) {
12501260 return result ;
12511261}
12521262
1253- int parser_trim_buffers (parser_t * self ) {
1263+ int parser_trim_buffers (register parser_t * self ) {
12541264 /*
12551265 Free memory
12561266 */
@@ -1353,7 +1363,7 @@ int parser_trim_buffers(parser_t *self) {
13531363 all : tokenize all the data vs. certain number of rows
13541364 */
13551365
1356- int _tokenize_helper (parser_t * self , size_t nrows , int all ) {
1366+ int _tokenize_helper (register parser_t * self , size_t nrows , int all ) {
13571367 int status = 0 ;
13581368 int64_t start_lines = self -> lines ;
13591369
@@ -1402,12 +1412,12 @@ int _tokenize_helper(parser_t *self, size_t nrows, int all) {
14021412 return status ;
14031413}
14041414
1405- int tokenize_nrows (parser_t * self , size_t nrows ) {
1415+ int tokenize_nrows (register parser_t * self , size_t nrows ) {
14061416 int status = _tokenize_helper (self , nrows , 0 );
14071417 return status ;
14081418}
14091419
1410- int tokenize_all_rows (parser_t * self ) {
1420+ int tokenize_all_rows (register parser_t * self ) {
14111421 int status = _tokenize_helper (self , -1 , 1 );
14121422 return status ;
14131423}
@@ -1529,9 +1539,14 @@ int main(int argc, char *argv[]) {
15291539// * Add tsep argument for thousands separator
15301540//
15311541
1542+ // pessimistic but quick assessment,
1543+ // assuming that each decimal digit requires 4 bits to store
1544+ const int max_int_decimal_digits = (sizeof (unsigned int ) * 8 ) / 4 ;
1545+
15321546double xstrtod (const char * str , char * * endptr , char decimal , char sci ,
15331547 char tsep , int skip_trailing ) {
15341548 double number ;
1549+ unsigned int i_number = 0 ;
15351550 int exponent ;
15361551 int negative ;
15371552 char * p = (char * )str ;
@@ -1554,19 +1569,30 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
15541569 p ++ ;
15551570 }
15561571
1557- number = 0. ;
15581572 exponent = 0 ;
15591573 num_digits = 0 ;
15601574 num_decimals = 0 ;
15611575
15621576 // Process string of digits.
1563- while (isdigit_ascii (* p )) {
1564- number = number * 10. + (* p - '0' );
1577+ while (isdigit_ascii (* p ) && num_digits <= max_int_decimal_digits ) {
1578+ i_number = i_number * 10 + (* p - '0' );
15651579 p ++ ;
15661580 num_digits ++ ;
15671581
15681582 p += (tsep != '\0' && * p == tsep );
15691583 }
1584+ number = i_number ;
1585+
1586+ if (num_digits > max_int_decimal_digits ) {
1587+ // process what's left as double
1588+ while (isdigit_ascii (* p )) {
1589+ number = number * 10. + (* p - '0' );
1590+ p ++ ;
1591+ num_digits ++ ;
1592+
1593+ p += (tsep != '\0' && * p == tsep );
1594+ }
1595+ }
15701596
15711597 // Process decimal part.
15721598 if (* p == decimal ) {
0 commit comments