From cbc69af8a229f307aa77146c6dfb63bae32bb2f9 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 5 Oct 2022 19:57:52 +0200 Subject: [PATCH] gh-91539: improve performance of get_proxies_environment (GH-91566) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * improve performance of get_proxies_environment when there are many environment variables * 📜🤖 Added by blurb_it. * fix case of short env name * fix formatting * fix whitespace * whitespace * Update Lib/urllib/request.py Co-authored-by: Carl Meyer * Update Lib/urllib/request.py Co-authored-by: Carl Meyer * Update Lib/urllib/request.py Co-authored-by: Carl Meyer * Update Lib/urllib/request.py Co-authored-by: Carl Meyer * whitespace * Update Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst Co-authored-by: Carl Meyer * Update Lib/urllib/request.py Co-authored-by: Carl Meyer Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Carl Meyer (cherry picked from commit aeb28f51304ebe2ad9fd6a61b6e4e5a03d288aa1) Co-authored-by: Pieter Eendebak --- Lib/urllib/request.py | 26 ++++++++++++------- ...2-04-15-11-29-38.gh-issue-91539.7WgVuA.rst | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index a0ef60b30de914..320163be63ad54 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2492,28 +2492,34 @@ def getproxies_environment(): this seems to be the standard convention. If you need a different way, you can pass a proxies dictionary to the [Fancy]URLopener constructor. - """ - proxies = {} # in order to prefer lowercase variables, process environment in # two passes: first matches any, second pass matches lowercase only - for name, value in os.environ.items(): - name = name.lower() - if value and name[-6:] == '_proxy': - proxies[name[:-6]] = value + + # select only environment variables which end in (after making lowercase) _proxy + proxies = {} + environment = [] + for name in os.environ.keys(): + # fast screen underscore position before more expensive case-folding + if len(name) > 5 and name[-6] == "_" and name[-5:].lower() == "proxy": + value = os.environ[name] + proxy_name = name[:-6].lower() + environment.append((name, value, proxy_name)) + if value: + proxies[proxy_name] = value # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY # (non-all-lowercase) as it may be set from the web server by a "Proxy:" # header from the client # If "proxy" is lowercase, it will still be used thanks to the next block if 'REQUEST_METHOD' in os.environ: proxies.pop('http', None) - for name, value in os.environ.items(): + for name, value, proxy_name in environment: + # not case-folded, checking here for lower-case env vars only if name[-6:] == '_proxy': - name = name.lower() if value: - proxies[name[:-6]] = value + proxies[proxy_name] = value else: - proxies.pop(name[:-6], None) + proxies.pop(proxy_name, None) return proxies def proxy_bypass_environment(host, proxies=None): diff --git a/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst b/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst new file mode 100644 index 00000000000000..16d61f1b91102d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst @@ -0,0 +1 @@ +Improve performance of ``urllib.request.getproxies_environment`` when there are many environment variables