From a43df7a424be2ad03fdf06df80204704ff379c8f Mon Sep 17 00:00:00 2001 From: Janek Bevendorff Date: Mon, 6 Mar 2023 09:52:24 +0100 Subject: [PATCH] Add reset() method to EncodingDetector --- resiliparse/resiliparse/parse/encoding.pxd | 1 + resiliparse/resiliparse/parse/encoding.pyx | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/resiliparse/resiliparse/parse/encoding.pxd b/resiliparse/resiliparse/parse/encoding.pxd index f52b9b6f..3a684eae 100644 --- a/resiliparse/resiliparse/parse/encoding.pxd +++ b/resiliparse/resiliparse/parse/encoding.pxd @@ -21,6 +21,7 @@ cdef class EncodingDetector: cpdef void update(self, const string& data) cpdef str encoding(self, bint html5_compatible=*) + cpdef void reset(self) cpdef str detect_encoding(bytes data, size_t max_len=*, bint html5_compatible=*, bint from_html_meta=*) cpdef str bytes_to_str(bytes data, str encoding=*, str errors=*, fallback_encodings=*, bint strip_bom=*) diff --git a/resiliparse/resiliparse/parse/encoding.pyx b/resiliparse/resiliparse/parse/encoding.pyx index 25bee504..e3b4adae 100644 --- a/resiliparse/resiliparse/parse/encoding.pyx +++ b/resiliparse/resiliparse/parse/encoding.pyx @@ -163,6 +163,14 @@ cdef class EncodingDetector: return enc + cpdef void reset(self): + """ + reset(self) + + Manually reset the encoding detector state. + """ + uchardet_reset(self.d) + def __dealloc__(self): if self.d != NULL: uchardet_delete(self.d)