From 5df09e7372716101364ddde636386cc32aeb295a Mon Sep 17 00:00:00 2001 From: Michael Vettese Date: Fri, 10 Jan 2025 11:20:31 -0500 Subject: [PATCH 1/4] Removes deprecated cgi function and replaces with email.message.Message --- docs/urls.html | 62 +++++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/docs/urls.html b/docs/urls.html index 8267ca2..72aae7a 100644 --- a/docs/urls.html +++ b/docs/urls.html @@ -37,8 +37,8 @@

Module pywebcopy.urls

import os import re -from cgi import parse_header from collections import namedtuple +from email.message import Message from hashlib import md5 from zlib import adler32 @@ -53,7 +53,7 @@

Module pywebcopy.urls

__all__ = [ 'url2path', 'filename_present', 'relate', 'get_etag', 'HIERARCHY', 'LINEAR', - 'parse_url', 'parse_header', 'get_host', 'get_prefix', 'get_suffix', + 'parse_url', 'Message', 'get_host', 'get_prefix', 'get_suffix', 'Url', 'LocationParseError', 'secure_filename', 'split_first', 'common_prefix_map', 'common_suffix_map', 'get_content_type_from_headers', 'Context', 'ContextError', @@ -296,11 +296,17 @@

Module pywebcopy.urls

return md5(string).hexdigest() +def parse_separated_header(value: str): + m = Message() + m['content-type'] = value + return dict(m.get_params()) + + def get_content_type_from_headers(headers, default=None): content_type = headers.get('Content-Type', default) if not content_type: return default - content_type, params = parse_header(content_type) + content_type, params = parse_separated_header(content_type) return content_type @@ -769,6 +775,21 @@

Functions

return bool(_filter_and_group_segments(url, remove_query=True, remove_frag=True)[1]) +
+ def parse_separated_header(value: str) +
+
+
+
+ + Expand source code + +
def parse_separated_header(value: str):
+        m = Message()
+        m['content-type'] = value
+        return dict(m.get_params())
+
+
def get_content_type_from_headers(headers, default=None)
@@ -782,7 +803,7 @@

Functions

content_type = headers.get('Content-Type', default) if not content_type: return default - content_type, params = parse_header(content_type) + content_type, params = parse_separated_header(content_type) return content_type @@ -844,37 +865,6 @@

Functions

return common_suffix_map.get(content_type) -
-def parse_header(line) -
-
-

Parse a Content-type like header.

-

Return the main content-type and a dictionary of options.

-
- -Expand source code - -
def parse_header(line):
-    """Parse a Content-type like header.
-
-    Return the main content-type and a dictionary of options.
-
-    """
-    parts = _parseparam(';' + line)
-    key = parts.__next__()
-    pdict = {}
-    for p in parts:
-        i = p.find('=')
-        if i >= 0:
-            name = p[:i].strip().lower()
-            value = p[i+1:].strip()
-            if len(value) >= 2 and value[0] == value[-1] == '"':
-                value = value[1:-1]
-                value = value.replace('\\\\', '\\').replace('\\"', '"')
-            pdict[name] = value
-    return key, pdict
-
-
def parse_url(url)
@@ -1649,7 +1639,7 @@

Index

  • get_host
  • get_prefix
  • get_suffix
  • -
  • parse_header
  • +
  • parse_separated_header
  • parse_url
  • relate
  • secure_filename
  • From ca23196616a2fd44bbefdb44be2ad426b45460c4 Mon Sep 17 00:00:00 2001 From: Michael Vettese Date: Wed, 15 Jan 2025 10:38:01 -0500 Subject: [PATCH 2/4] Adds urls file --- pywebcopy/urls.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pywebcopy/urls.py b/pywebcopy/urls.py index 18480cc..be837fe 100644 --- a/pywebcopy/urls.py +++ b/pywebcopy/urls.py @@ -9,8 +9,8 @@ import os import re -from cgi import parse_header from collections import namedtuple +from email.message import Message from hashlib import md5 from zlib import adler32 @@ -25,7 +25,7 @@ __all__ = [ 'url2path', 'filename_present', 'relate', 'get_etag', 'HIERARCHY', 'LINEAR', - 'parse_url', 'parse_header', 'get_host', 'get_prefix', 'get_suffix', + 'parse_url', 'Message', 'get_host', 'get_prefix', 'get_suffix', 'Url', 'LocationParseError', 'secure_filename', 'split_first', 'common_prefix_map', 'common_suffix_map', 'get_content_type_from_headers', 'Context', 'ContextError', @@ -272,11 +272,18 @@ def get_etag(string): return md5(string).hexdigest() +def parse_separated_header(value: str): + # Adapted from https://peps.python.org/pep-0594/#cgi + m = Message() + m['content-type'] = value + return dict(m.get_params()) + + def get_content_type_from_headers(headers, default=None): content_type = headers.get('Content-Type', default) if not content_type: return default - content_type, params = parse_header(content_type) + content_type = parse_separated_header(content_type) return content_type From a1c5cffae833db73e0ba493cd0fe48e2038e6a08 Mon Sep 17 00:00:00 2001 From: Michael Vettese Date: Tue, 28 Jan 2025 14:12:51 -0500 Subject: [PATCH 3/4] Update parseheader function --- docs/urls.html | 7 ++++--- pywebcopy/urls.py | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/urls.html b/docs/urls.html index 72aae7a..b16cbe8 100644 --- a/docs/urls.html +++ b/docs/urls.html @@ -785,9 +785,10 @@

    Functions

    Expand source code
    def parse_separated_header(value: str):
    -        m = Message()
    -        m['content-type'] = value
    -        return dict(m.get_params())
    + msg = EmailMessage() + msg['content-type'] = 'application/json; charset='utf8'' + main, params = msg.get_content_type(), msg['content-type'].params + return main, params
    diff --git a/pywebcopy/urls.py b/pywebcopy/urls.py index be837fe..a8ab7f0 100644 --- a/pywebcopy/urls.py +++ b/pywebcopy/urls.py @@ -10,7 +10,7 @@ import os import re from collections import namedtuple -from email.message import Message +from email.message import EmailMessage from hashlib import md5 from zlib import adler32 @@ -273,10 +273,10 @@ def get_etag(string): def parse_separated_header(value: str): - # Adapted from https://peps.python.org/pep-0594/#cgi - m = Message() - m['content-type'] = value - return dict(m.get_params()) + msg = EmailMessage() + msg['content-type'] = 'application/json; charset="utf8"' + main, params = msg.get_content_type(), msg['content-type'].params + return main, params def get_content_type_from_headers(headers, default=None): From a9ba09f48b0f8cd24e9748b6486ea409eec926f1 Mon Sep 17 00:00:00 2001 From: Michael Vettese Date: Fri, 7 Feb 2025 11:21:34 -0500 Subject: [PATCH 4/4] Updates docs --- docs/urls.html | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/urls.html b/docs/urls.html index b16cbe8..c23753b 100644 --- a/docs/urls.html +++ b/docs/urls.html @@ -297,16 +297,17 @@

    Module pywebcopy.urls

    def parse_separated_header(value: str): - m = Message() - m['content-type'] = value - return dict(m.get_params()) + msg = EmailMessage() + msg['content-type'] = 'application/json; charset="utf8"' + main, params = msg.get_content_type(), msg['content-type'].params + return main, params def get_content_type_from_headers(headers, default=None): content_type = headers.get('Content-Type', default) if not content_type: return default - content_type, params = parse_separated_header(content_type) + content_type = parse_separated_header(content_type) return content_type @@ -804,7 +805,7 @@

    Functions

    content_type = headers.get('Content-Type', default) if not content_type: return default - content_type, params = parse_separated_header(content_type) + content_type = parse_separated_header(content_type) return content_type