@@ -14,6 +14,9 @@ module instead.
1414#include "structmember.h" // PyMemberDef
1515#include <stdbool.h>
1616
17+ #define NOT_SET ((Py_UCS4)-1)
18+ #define EOL ((Py_UCS4)-2)
19+
1720
1821typedef struct {
1922 PyObject * error_obj ; /* CSV exception */
@@ -153,9 +156,9 @@ get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
153156}
154157
155158static PyObject *
156- get_nullchar_as_None (Py_UCS4 c )
159+ get_char_or_None (Py_UCS4 c )
157160{
158- if (c == '\0' ) {
161+ if (c == NOT_SET ) {
159162 Py_RETURN_NONE ;
160163 }
161164 else
@@ -172,19 +175,19 @@ Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
172175static PyObject *
173176Dialect_get_delimiter (DialectObj * self , void * Py_UNUSED (ignored ))
174177{
175- return get_nullchar_as_None (self -> delimiter );
178+ return get_char_or_None (self -> delimiter );
176179}
177180
178181static PyObject *
179182Dialect_get_escapechar (DialectObj * self , void * Py_UNUSED (ignored ))
180183{
181- return get_nullchar_as_None (self -> escapechar );
184+ return get_char_or_None (self -> escapechar );
182185}
183186
184187static PyObject *
185188Dialect_get_quotechar (DialectObj * self , void * Py_UNUSED (ignored ))
186189{
187- return get_nullchar_as_None (self -> quotechar );
190+ return get_char_or_None (self -> quotechar );
188191}
189192
190193static PyObject *
@@ -235,7 +238,7 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt
235238 * target = dflt ;
236239 }
237240 else {
238- * target = '\0' ;
241+ * target = NOT_SET ;
239242 if (src != Py_None ) {
240243 if (!PyUnicode_Check (src )) {
241244 PyErr_Format (PyExc_TypeError ,
@@ -254,7 +257,7 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt
254257 return -1 ;
255258 }
256259 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
257- else {
260+ else if ( len > 0 ) {
258261 * target = PyUnicode_READ_CHAR (src , 0 );
259262 }
260263 }
@@ -269,7 +272,7 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
269272 * target = dflt ;
270273 }
271274 else {
272- * target = '\0' ;
275+ * target = NOT_SET ;
273276 if (!PyUnicode_Check (src )) {
274277 PyErr_Format (PyExc_TypeError ,
275278 "\"%s\" must be string, not %.200s" , name ,
@@ -287,7 +290,7 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
287290 return -1 ;
288291 }
289292 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
290- else {
293+ else if ( len > 0 ) {
291294 * target = PyUnicode_READ_CHAR (src , 0 );
292295 }
293296 }
@@ -481,7 +484,7 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
481484 goto err
482485 DIASET (_set_char , "delimiter" , & self -> delimiter , delimiter , ',' );
483486 DIASET (_set_bool , "doublequote" , & self -> doublequote , doublequote , true);
484- DIASET (_set_char_or_none , "escapechar" , & self -> escapechar , escapechar , 0 );
487+ DIASET (_set_char_or_none , "escapechar" , & self -> escapechar , escapechar , NOT_SET );
485488 DIASET (_set_str , "lineterminator" , & self -> lineterminator , lineterminator , "\r\n" );
486489 DIASET (_set_char_or_none , "quotechar" , & self -> quotechar , quotechar , '"' );
487490 DIASET (_set_int , "quoting" , & self -> quoting , quoting , QUOTE_MINIMAL );
@@ -491,19 +494,19 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
491494 /* validate options */
492495 if (dialect_check_quoting (self -> quoting ))
493496 goto err ;
494- if (self -> delimiter == 0 ) {
497+ if (self -> delimiter == NOT_SET ) {
495498 PyErr_SetString (PyExc_TypeError ,
496499 "\"delimiter\" must be a 1-character string" );
497500 goto err ;
498501 }
499502 if (quotechar == Py_None && quoting == NULL )
500503 self -> quoting = QUOTE_NONE ;
501- if (self -> quoting != QUOTE_NONE && self -> quotechar == 0 ) {
504+ if (self -> quoting != QUOTE_NONE && self -> quotechar == NOT_SET ) {
502505 PyErr_SetString (PyExc_TypeError ,
503506 "quotechar must be set if quoting enabled" );
504507 goto err ;
505508 }
506- if (self -> lineterminator == 0 ) {
509+ if (self -> lineterminator == NULL ) {
507510 PyErr_SetString (PyExc_TypeError , "lineterminator must be set" );
508511 goto err ;
509512 }
@@ -670,7 +673,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
670673 switch (self -> state ) {
671674 case START_RECORD :
672675 /* start of record */
673- if (c == '\0' )
676+ if (c == EOL )
674677 /* empty line - return [] */
675678 break ;
676679 else if (c == '\n' || c == '\r' ) {
@@ -682,11 +685,11 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
682685 /* fallthru */
683686 case START_FIELD :
684687 /* expecting field */
685- if (c == '\n' || c == '\r' || c == '\0' ) {
688+ if (c == '\n' || c == '\r' || c == EOL ) {
686689 /* save empty field - return [fields] */
687690 if (parse_save_field (self ) < 0 )
688691 return -1 ;
689- self -> state = (c == '\0' ? START_RECORD : EAT_CRNL );
692+ self -> state = (c == EOL ? START_RECORD : EAT_CRNL );
690693 }
691694 else if (c == dialect -> quotechar &&
692695 dialect -> quoting != QUOTE_NONE ) {
@@ -722,25 +725,25 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
722725 self -> state = AFTER_ESCAPED_CRNL ;
723726 break ;
724727 }
725- if (c == '\0' )
728+ if (c == EOL )
726729 c = '\n' ;
727730 if (parse_add_char (self , module_state , c ) < 0 )
728731 return -1 ;
729732 self -> state = IN_FIELD ;
730733 break ;
731734
732735 case AFTER_ESCAPED_CRNL :
733- if (c == '\0' )
736+ if (c == EOL )
734737 break ;
735738 /*fallthru*/
736739
737740 case IN_FIELD :
738741 /* in unquoted field */
739- if (c == '\n' || c == '\r' || c == '\0' ) {
742+ if (c == '\n' || c == '\r' || c == EOL ) {
740743 /* end of line - return [fields] */
741744 if (parse_save_field (self ) < 0 )
742745 return -1 ;
743- self -> state = (c == '\0' ? START_RECORD : EAT_CRNL );
746+ self -> state = (c == EOL ? START_RECORD : EAT_CRNL );
744747 }
745748 else if (c == dialect -> escapechar ) {
746749 /* possible escaped character */
@@ -761,7 +764,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
761764
762765 case IN_QUOTED_FIELD :
763766 /* in quoted field */
764- if (c == '\0' )
767+ if (c == EOL )
765768 ;
766769 else if (c == dialect -> escapechar ) {
767770 /* Possible escape character */
@@ -786,7 +789,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
786789 break ;
787790
788791 case ESCAPE_IN_QUOTED_FIELD :
789- if (c == '\0' )
792+ if (c == EOL )
790793 c = '\n' ;
791794 if (parse_add_char (self , module_state , c ) < 0 )
792795 return -1 ;
@@ -808,11 +811,11 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
808811 return -1 ;
809812 self -> state = START_FIELD ;
810813 }
811- else if (c == '\n' || c == '\r' || c == '\0' ) {
814+ else if (c == '\n' || c == '\r' || c == EOL ) {
812815 /* end of line - return [fields] */
813816 if (parse_save_field (self ) < 0 )
814817 return -1 ;
815- self -> state = (c == '\0' ? START_RECORD : EAT_CRNL );
818+ self -> state = (c == EOL ? START_RECORD : EAT_CRNL );
816819 }
817820 else if (!dialect -> strict ) {
818821 if (parse_add_char (self , module_state , c ) < 0 )
@@ -831,7 +834,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
831834 case EAT_CRNL :
832835 if (c == '\n' || c == '\r' )
833836 ;
834- else if (c == '\0' )
837+ else if (c == EOL )
835838 self -> state = START_RECORD ;
836839 else {
837840 PyErr_Format (module_state -> error_obj ,
@@ -909,20 +912,14 @@ Reader_iternext(ReaderObj *self)
909912 linelen = PyUnicode_GET_LENGTH (lineobj );
910913 while (linelen -- ) {
911914 c = PyUnicode_READ (kind , data , pos );
912- if (c == '\0' ) {
913- Py_DECREF (lineobj );
914- PyErr_Format (module_state -> error_obj ,
915- "line contains NUL" );
916- goto err ;
917- }
918915 if (parse_process_char (self , module_state , c ) < 0 ) {
919916 Py_DECREF (lineobj );
920917 goto err ;
921918 }
922919 pos ++ ;
923920 }
924921 Py_DECREF (lineobj );
925- if (parse_process_char (self , module_state , 0 ) < 0 )
922+ if (parse_process_char (self , module_state , EOL ) < 0 )
926923 goto err ;
927924 } while (self -> state != START_RECORD );
928925
@@ -1127,7 +1124,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, const void *field_dat
11271124 * quoted = 1 ;
11281125 }
11291126 if (want_escape ) {
1130- if (! dialect -> escapechar ) {
1127+ if (dialect -> escapechar == NOT_SET ) {
11311128 PyErr_Format (self -> error_obj ,
11321129 "need to escape, but no escapechar set" );
11331130 return -1 ;
0 commit comments