diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c3aabde47..6b91edd6c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,22 +1,34 @@ name: CI env: - # add 3.10+ after patching nose (https://github.com/nose-devs/nose/issues/1099) - # or switching to fork of https://github.com/mdmintz/pynose - all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9 - main-cpython-versions: 2.7, 3.2, 3.5, 3.9 + all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 + main-cpython-versions: 2.7, 3.2, 3.5, 3.9, 3.11 pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7 cpython-versions: main - test-set: both + test-set: core on: push: + inputs: + cpython-versions: + type: string + default: all + test-set: + type: string + default: core pull_request: + inputs: + cpython-versions: + type: string + default: main + test-set: + type: string + default: both workflow_dispatch: inputs: cpython-versions: type: choice - description: CPython versions (main = 2.7, 3.2, 3.5, 3.9) + description: CPython versions (main = 2.7, 3.2, 3.5, 3.9, 3.11) options: - all - main @@ -30,7 +42,7 @@ on: - core - download required: true - default: core + default: both permissions: contents: read @@ -44,7 +56,8 @@ jobs: test-set: ${{ steps.run.outputs.test-set }} own-pip-versions: ${{ steps.run.outputs.own-pip-versions }} steps: - - id: run + - name: Make version array + id: run run: | # Make a JSON Array from comma/space-separated string (no extra escaping) json_list() { \ @@ -66,7 +79,6 @@ jobs: # versions with a special get-pip.py in a per-version subdirectory printf 'own-pip-versions=%s\n' \ "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT" - tests: name: Run tests needs: select @@ -82,28 +94,29 @@ jobs: fail-fast: true matrix: os: [ubuntu-20.04] - # outside steps, use github.env...., not env.... python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }} python-impl: [cpython] ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }} run-tests-ext: [sh] include: - os: windows-2019 - python-version: 3.2 + python-version: 3.4 python-impl: cpython ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} run-tests-ext: bat - os: windows-2019 - python-version: 3.2 + python-version: 3.4 python-impl: cpython ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} run-tests-ext: bat # jython - os: ubuntu-20.04 + python-version: 2.7 python-impl: jython ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} run-tests-ext: sh - os: ubuntu-20.04 + python-version: 2.7 python-impl: jython ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} run-tests-ext: sh @@ -113,7 +126,7 @@ jobs: #-------- Python 3 ----- - name: Set up supported Python ${{ matrix.python-version }} id: setup-python - if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7'}} + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != '3.12'}} # wrap broken actions/setup-python@v4 uses: ytdl-org/setup-python@v1 with: @@ -151,9 +164,45 @@ jobs: 'import sys' \ 'print(sys.path)' \ | ${expected} - + #-------- Python 3.12 - + - name: Set up CPython 3.12 environment + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }} + shell: bash + run: | + PYENV_ROOT=$HOME/.local/share/pyenv + echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV" + - name: Cache Python 3.12 + id: cache312 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }} + uses: actions/cache@v3 + with: + key: python-3.12 + path: | + ${{ env.PYENV_ROOT }} + - name: Build and set up Python 3.12 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }} + # dl and build locally + shell: bash + run: | + # Install build environment + sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ + libncursesw5-dev libreadline-dev libsqlite3-dev \ + libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev + # Download PyEnv from its GitHub repository. + export PYENV_ROOT=${{ env.PYENV_ROOT }} + export PATH=$PYENV_ROOT/bin:$PATH + git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" + pyenv install 3.12.0b4 + - name: Locate Python 3.12 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }} + shell: bash + run: | + PYTHONHOME="${{ env.PYENV_ROOT }}/versions/3.12.0b4" + echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV" + echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV" #-------- Python 2.7 -- - name: Set up Python 2.7 - if: ${{ matrix.python-version == '2.7' }} + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.7' }} # install 2.7 shell: bash run: | @@ -161,7 +210,7 @@ jobs: echo "PYTHONHOME=/usr" >> "$GITHUB_ENV" #-------- Python 2.6 -- - name: Set up Python 2.6 environment - if: ${{ matrix.python-version == '2.6' }} + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }} shell: bash run: | openssl_name=openssl-1.0.2u @@ -181,7 +230,7 @@ jobs: ${{ env.openssl_dir }} ${{ env.PYENV_ROOT }} - name: Build and set up Python 2.6 - if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }} + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }} # dl and build locally shell: bash run: | @@ -205,24 +254,21 @@ jobs: make install ) rm -rf $openssl_name rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs - # Download PyEnv from its GitHub repository. export PYENV_ROOT=${{ env.PYENV_ROOT }} export PATH=$PYENV_ROOT/bin:$PATH git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" - # Prevent pyenv build trying (and failing) to update pip export GET_PIP=get-pip-2.6.py echo 'import sys; sys.exit(0)' > ${GET_PIP} GET_PIP=$(realpath $GET_PIP) - # Build and install Python export CFLAGS="-I$openssl_inc" export LDFLAGS="-L$openssl_lib" export LD_LIBRARY_PATH="$openssl_lib" pyenv install 2.6.9 - name: Locate Python 2.6 - if: ${{ matrix.python-version == '2.6' }} + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }} shell: bash run: | PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9" @@ -244,7 +290,7 @@ jobs: echo "PIP=pip" >> "$GITHUB_ENV" - name: Cache Jython id: cachejy - if: ${{ matrix.python-impl == 'jython' }} + if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' }} uses: actions/cache@v3 with: # 2.7.3 now available, may solve SNI issue @@ -252,7 +298,7 @@ jobs: path: | ${{ env.JYTHON_ROOT }} - name: Install Jython - if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }} + if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' && ! steps.cachejy.outputs.cache-hit }} shell: bash run: | JYTHON_ROOT="${{ env.JYTHON_ROOT }}" @@ -265,6 +311,11 @@ jobs: run: | JYTHON_ROOT="${{ env.JYTHON_ROOT }}" echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH + - name: Install supporting Python 2.7 if possible + if: ${{ steps.cachejy.outputs.cache-hit }} + shell: bash + run: | + sudo apt-get install -y python2.7 || true #-------- pip --------- - name: Set up supported Python ${{ matrix.python-version }} pip if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }} @@ -317,12 +368,17 @@ jobs: done #-------- nose -------- - name: Install nose for Python ${{ matrix.python-version }} - if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }} + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' || matrix.python-version == '3.12' }} shell: bash run: | echo "$PATH" echo "$PYTHONHOME" - $PIP -qq show nose || $PIP install nose + # Use PyNose for recent Pythons instead of Nose + py3ver="${{ matrix.python-version }}" + py3ver=${py3ver#3.} + [ "$py3ver" != "${{ matrix.python-version }}" ] && py3ver=${py3ver%.*} || py3ver=0 + [ "$py3ver" -ge 9 ] && nose=pynose || nose=nose + $PIP -qq show $nose || $PIP install $nose - name: Install nose for other Python 2 if: ${{ matrix.python-impl == 'jython' || matrix.python-version == '2.6' }} shell: bash @@ -342,6 +398,11 @@ jobs: if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} shell: bash run: | + # set PYTHON_VER + PYTHON_VER=${{ matrix.python-version }} + [ "${PYTHON_VER#*-}" != "$PYTHON_VER" ] || PYTHON_VER="${{ matrix.python-impl }}-${PYTHON_VER}" + echo "PYTHON_VER=$PYTHON_VER" >> "$GITHUB_ENV" + echo "PYTHON_IMPL=${{ matrix.python-impl }}" >> "$GITHUB_ENV" # define a test to validate the Python version used by nosetests printf '%s\n' \ 'from __future__ import unicode_literals' \ @@ -354,9 +415,9 @@ jobs: ' def setUp(self):' \ ' self.ver = os.environ["PYTHON_VER"].split("-")' \ ' def test_python_ver(self):' \ - ' self.assertEqual(sys.version[:3], self.ver[-1])' \ + ' self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \ ' self.assertTrue(sys.version.startswith(self.ver[-1]))' \ - ' self.assertIn(self.ver[0], sys.version.lower())' \ + ' self.assertIn(self.ver[0], ",".join((sys.version, platform.python_implementation())).lower())' \ ' def test_python_impl(self):' \ ' self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \ > test/test_python.py @@ -366,11 +427,8 @@ jobs: continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} env: YTDL_TEST_SET: ${{ matrix.ytdl-test-set }} - PYTHON_VER: ${{ matrix.python-version }} - PYTHON_IMPL: ${{ matrix.python-impl }} run: | ./devscripts/run_tests.${{ matrix.run-tests-ext }} - flake8: name: Linter runs-on: ubuntu-latest diff --git a/README.md b/README.md index 14a3d6c86..47e686f84 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.ex You can also use pip: sudo -H pip install --upgrade youtube-dl - + This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. macOS users can install youtube-dl with [Homebrew](https://brew.sh/): @@ -563,7 +563,7 @@ The basic usage is not to set any template arguments when downloading a single f - `is_live` (boolean): Whether this video is a live stream or a fixed-length video - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL - - `format` (string): A human-readable description of the format + - `format` (string): A human-readable description of the format - `format_id` (string): Format code specified by `--format` - `format_note` (string): Additional info about the format - `width` (numeric): Width of the video @@ -675,7 +675,7 @@ The general syntax for format selection is `--format FORMAT` or shorter `-f FORM **tl;dr:** [navigate me to examples](#format-selection-examples). -The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. +The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. @@ -760,7 +760,7 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb - Absolute dates: Dates in the format `YYYYMMDD`. - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?` - + Examples: ```bash @@ -1000,6 +1000,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file python test/test_download.py nosetests +For Python versions 3.6 and later, you can use [pynose](https://pypi.org/project/pynose/) to implement `nosetests`. The original [nose](https://pypi.org/project/nose/) has not been upgraded for 3.10 and later. + See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. If you want to create a build of youtube-dl yourself, you'll need @@ -1091,7 +1093,7 @@ In any case, thank you very much for your contributions! ## youtube-dl coding conventions -This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. +This section introduces guidelines for writing idiomatic, robust and future-proof extractor code. Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all. @@ -1114,7 +1116,7 @@ Say you have some source dictionary `meta` that you've fetched as JSON with HTTP ```python meta = self._download_json(url, video_id) ``` - + Assume at this point `meta`'s layout is: ```python @@ -1158,7 +1160,7 @@ description = self._search_regex( ``` On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. - + ### Provide fallbacks When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. @@ -1206,7 +1208,7 @@ r'(id|ID)=(?P\d+)' #### Make regular expressions relaxed and flexible When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. - + ##### Example Say you need to extract `title` from the following HTML code: @@ -1230,7 +1232,7 @@ title = self._search_regex( webpage, 'title', group='title') ``` -Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: +Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: The code definitely should not look like: @@ -1331,27 +1333,114 @@ Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`] Use `url_or_none` for safe URL processing. -Use `try_get` for safe metadata extraction from parsed JSON. +Use `traverse_obj` for safe metadata extraction from parsed JSON. -Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples ##### Safely extract optional description from parsed JSON + +When processing complex JSON, as often returned by site API requests or stashed in web pages for "hydration", you can use the `traverse_obj()` utility function to handle multiple fallback values and to ensure the expected type of metadata items. The function's docstring defines how the function works: also review usage in the codebase for more examples. + +In this example, a text `description`, or `None`, is pulled from the `.result.video[0].summary` member of the parsed JSON `response`, if available. + +```python +description = traverse_obj(response, ('result', 'video', 0, 'summary', T(compat_str))) +``` +`T(...)` is a shorthand for a set literal; if you hate people who still run Python 2.6, `T(type_or_transformation)` could be written as a set literal `{type_or_transformation}`. + +Some extractors use the older and less capable `try_get()` function in the same way. + ```python description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) ``` ##### Safely extract more optional metadata + +In this example, various optional metadata values are extracted from the `.result.video[0]` member of the parsed JSON `response`, which is expected to be a JS object, parsed into a `dict`, with no crash if that isn't so, or if any of the target values are missing or invalid. + ```python -video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +video = traverse_obj(response, ('result', 'video', 0, T(dict))) or {} +# formerly: +# video = try_get(response, lambda x: x['result']['video'][0], dict) or {} description = video.get('summary') duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` +#### Safely extract nested lists + +Suppose you've extracted JSON like this into a Python data structure named `media_json` using, say, the `_download_json()` or `_parse_json()` methods of `InfoExtractor`: +```json +{ + "title": "Example video", + "comment": "try extracting this", + "media": [{ + "type": "bad", + "size": 320, + "url": "https://some.cdn.site/bad.mp4" + }, { + "type": "streaming", + "url": "https://some.cdn.site/hls.m3u8" + }, { + "type": "super", + "size": 1280, + "url": "https://some.cdn.site/good.webm" + }], + "moreStuff": "more values", + ... +} +``` + +Then extractor code like this can collect the various fields of the JSON: +```python +... +from ..utils import ( + determine_ext, + int_or_none, + T, + traverse_obj, + txt_or_none, + url_or_none, +) +... + ... + info_dict = {} + # extract title and description if valid and not empty + info_dict.update(traverse_obj(media_json, { + 'title': ('title', T(txt_or_none)), + 'description': ('comment', T(txt_or_none)), + })) + + # extract any recognisable media formats + fmts = [] + # traverse into "media" list, extract `dict`s with desired keys + for fmt in traverse_obj(media_json, ('media', Ellipsis, { + 'format_id': ('type', T(txt_or_none)), + 'url': ('url', T(url_or_none)), + 'width': ('size', T(int_or_none)), })): + # bad `fmt` values were `None` and removed + if 'url' not in fmt: + continue + fmt_url = fmt['url'] # known to be valid URL + ext = determine_ext(fmt_url) + if ext == 'm3u8': + fmts.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', fatal=False)) + else: + fmt['ext'] = ext + fmts.append(fmt) + + # sort, raise if no formats + self._sort_formats(fmts) + + info_dict['formats'] = fmts + ... +``` +The extractor raises an exception rather than random crashes if the JSON structure changes so that no formats are found. + # EMBEDDING YOUTUBE-DL youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new). diff --git a/devscripts/__init__.py b/devscripts/__init__.py new file mode 100644 index 000000000..750dbdca7 --- /dev/null +++ b/devscripts/__init__.py @@ -0,0 +1 @@ +# Empty file needed to make devscripts.utils properly importable from outside diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index 3d1391334..7db396a77 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -5,8 +5,12 @@ from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + import youtube_dl +from youtube_dl.compat import compat_open as open + +from utils import read_file BASH_COMPLETION_FILE = "youtube-dl.bash-completion" BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" @@ -18,9 +22,8 @@ def build_completion(opt_parser): for option in group.option_list: # for every long flag opts_flag.append(option.get_opt_string()) - with open(BASH_COMPLETION_TEMPLATE) as f: - template = f.read() - with open(BASH_COMPLETION_FILE, "w") as f: + template = read_file(BASH_COMPLETION_TEMPLATE) + with open(BASH_COMPLETION_FILE, "w", encoding='utf-8') as f: # just using the special char filled_template = template.replace("{{flags}}", " ".join(opts_flag)) f.write(filled_template) diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 2ddfa1096..320bcfc27 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import json import mimetypes import netrc @@ -10,7 +9,9 @@ import re import sys -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) from youtube_dl.compat import ( compat_basestring, @@ -22,6 +23,7 @@ make_HTTPS_handler, sanitized_Request, ) +from utils import read_file class GitHubReleaser(object): @@ -89,8 +91,7 @@ def main(): changelog_file, version, build_path = args - with io.open(changelog_file, encoding='utf-8') as inf: - changelog = inf.read() + changelog = read_file(changelog_file) mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog) body = mobj.group(1) if mobj else '' diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index 51d19dd33..ef8a39e0b 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -6,10 +6,13 @@ from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + import youtube_dl from youtube_dl.utils import shell_quote +from utils import read_file, write_file + FISH_COMPLETION_FILE = 'youtube-dl.fish' FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' @@ -38,11 +41,9 @@ def build_completion(opt_parser): complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) commands.append(shell_quote(complete_cmd)) - with open(FISH_COMPLETION_TEMPLATE) as f: - template = f.read() + template = read_file(FISH_COMPLETION_TEMPLATE) filled_template = template.replace('{{commands}}', '\n'.join(commands)) - with open(FISH_COMPLETION_FILE, 'w') as f: - f.write(filled_template) + write_file(FISH_COMPLETION_FILE, filled_template) parser = youtube_dl.parseOpts()[0] diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py index 867ea0048..b84908f85 100755 --- a/devscripts/gh-pages/add-version.py +++ b/devscripts/gh-pages/add-version.py @@ -6,16 +6,21 @@ import hashlib import os.path +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__))))) + +from devscripts.utils import read_file, write_file +from youtube_dl.compat import compat_open as open if len(sys.argv) <= 1: print('Specify the version number as parameter') sys.exit() version = sys.argv[1] -with open('update/LATEST_VERSION', 'w') as f: - f.write(version) +write_file('update/LATEST_VERSION', version) -versions_info = json.load(open('update/versions.json')) +versions_info = json.loads(read_file('update/versions.json')) if 'signature' in versions_info: del versions_info['signature'] @@ -39,5 +44,5 @@ versions_info['versions'][version] = new_version versions_info['latest'] = version -with open('update/versions.json', 'w') as jsonf: - json.dump(versions_info, jsonf, indent=4, sort_keys=True) +with open('update/versions.json', 'w', encoding='utf-8') as jsonf: + json.dumps(versions_info, jsonf, indent=4, sort_keys=True) diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py index a873d32ee..3e38e9299 100755 --- a/devscripts/gh-pages/generate-download.py +++ b/devscripts/gh-pages/generate-download.py @@ -2,14 +2,21 @@ from __future__ import unicode_literals import json +import os.path +import sys -versions_info = json.load(open('update/versions.json')) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) + +from utils import read_file, write_file + +versions_info = json.loads(read_file('update/versions.json')) version = versions_info['latest'] version_dict = versions_info['versions'][version] # Read template page -with open('download.html.in', 'r', encoding='utf-8') as tmplf: - template = tmplf.read() +template = read_file('download.html.in') template = template.replace('@PROGRAM_VERSION@', version) template = template.replace('@PROGRAM_URL@', version_dict['bin'][0]) @@ -18,5 +25,5 @@ template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1]) template = template.replace('@TAR_URL@', version_dict['tar'][0]) template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1]) -with open('download.html', 'w', encoding='utf-8') as dlf: - dlf.write(template) + +write_file('download.html', template) diff --git a/devscripts/gh-pages/update-copyright.py b/devscripts/gh-pages/update-copyright.py index 61487f925..444595c48 100755 --- a/devscripts/gh-pages/update-copyright.py +++ b/devscripts/gh-pages/update-copyright.py @@ -5,17 +5,22 @@ import datetime import glob -import io # For Python 2 compatibility import os import re +import sys -year = str(datetime.datetime.now().year) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__))))) + +from devscripts.utils import read_file, write_file +from youtube_dl import compat_str + +year = compat_str(datetime.datetime.now().year) for fn in glob.glob('*.html*'): - with io.open(fn, encoding='utf-8') as f: - content = f.read() + content = read_file(fn) newc = re.sub(r'(?PCopyright © 2011-)(?P[0-9]{4})', 'Copyright © 2011-' + year, content) if content != newc: tmpFn = fn + '.part' - with io.open(tmpFn, 'wt', encoding='utf-8') as outf: - outf.write(newc) + write_file(tmpFn, newc) os.rename(tmpFn, fn) diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py index 506a62377..13a367d34 100755 --- a/devscripts/gh-pages/update-feed.py +++ b/devscripts/gh-pages/update-feed.py @@ -2,10 +2,16 @@ from __future__ import unicode_literals import datetime -import io import json +import os.path import textwrap +import sys +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + +from utils import write_file atom_template = textwrap.dedent("""\ @@ -72,5 +78,4 @@ entries_str = textwrap.indent(''.join(entries), '\t') atom_template = atom_template.replace('@ENTRIES@', entries_str) -with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file: - atom_file.write(atom_template) +write_file('update/releases.atom', atom_template) diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index 531c93c70..06a8a474c 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -5,15 +5,17 @@ import os import textwrap +dirn = os.path.dirname + # We must be able to import youtube_dl -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__))))) import youtube_dl +from devscripts.utils import read_file, write_file def main(): - with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf: - template = tmplf.read() + template = read_file('supportedsites.html.in') ie_htmls = [] for ie in youtube_dl.list_extractors(age_limit=None): @@ -29,8 +31,7 @@ def main(): template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t')) - with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: - sitesf.write(template) + write_file('supportedsites.html', template) if __name__ == '__main__': diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py index 226d1a5d6..5a9eb194f 100755 --- a/devscripts/make_contributing.py +++ b/devscripts/make_contributing.py @@ -1,10 +1,11 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import optparse import re +from utils import read_file, write_file + def main(): parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') @@ -14,8 +15,7 @@ def main(): infile, outfile = args - with io.open(infile, encoding='utf-8') as inf: - readme = inf.read() + readme = read_file(infile) bug_text = re.search( r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) @@ -25,8 +25,7 @@ def main(): out = bug_text + dev_text - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + write_file(outfile, out) if __name__ == '__main__': diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index b7ad23d83..65fa8169f 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -1,8 +1,11 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import optparse +import os.path +import sys + +from utils import read_file, read_version, write_file def main(): @@ -13,17 +16,11 @@ def main(): infile, outfile = args - with io.open(infile, encoding='utf-8') as inf: - issue_template_tmpl = inf.read() - - # Get the version from youtube_dl/version.py without importing the package - exec(compile(open('youtube_dl/version.py').read(), - 'youtube_dl/version.py', 'exec')) + issue_template_tmpl = read_file(infile) - out = issue_template_tmpl % {'version': locals()['__version__']} + out = issue_template_tmpl % {'version': read_version()} - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + write_file(outfile, out) if __name__ == '__main__': main() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 1a841a08b..5b8b123a4 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals, print_function from inspect import getsource -import io import os from os.path import dirname as dirn import re @@ -9,17 +8,20 @@ print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) lazy_extractors_filename = sys.argv[1] if os.path.exists(lazy_extractors_filename): os.remove(lazy_extractors_filename) # Py2: may be confused by leftover lazy_extractors.pyc -try: - os.remove(lazy_extractors_filename + 'c') -except OSError: - pass - +if sys.version_info[0] < 3: + for c in ('c', 'o'): + try: + os.remove(lazy_extractors_filename + 'c') + except OSError: + pass + +from devscripts.utils import read_file, write_file from youtube_dl.compat import compat_register_utf8 compat_register_utf8() @@ -27,8 +29,7 @@ from youtube_dl.extractor import _ALL_CLASSES from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor -with open('devscripts/lazy_load_template.py', 'rt') as f: - module_template = f.read() +module_template = read_file('devscripts/lazy_load_template.py') def get_source(m): @@ -114,7 +115,17 @@ def build_lazy_ie(ie, name): module_contents.append( '_ALL_CLASSES = [{0}]'.format(', '.join(names))) -module_src = '\n'.join(module_contents) + '\n' +module_src = '\n'.join(module_contents) + +write_file(lazy_extractors_filename, module_src + '\n') -with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: - f.write(module_src) +# work around JVM byte code module limit in Jython +if sys.platform.startswith('java') and sys.version_info[:2] == (2, 7): + import subprocess + from youtube_dl.compat import compat_subprocess_get_DEVNULL + # if Python 2.7 is available, use it to compile the module for Jython + try: + # if Python 2.7 is available, use it to compile the module for Jython + subprocess.check_call(['python2.7', '-m', 'py_compile', lazy_extractors_filename], stdout=compat_subprocess_get_DEVNULL()) + except Exception: + pass diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 8fbce0796..7a5b04dcc 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -1,8 +1,14 @@ from __future__ import unicode_literals -import io -import sys +import os.path import re +import sys +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + +from utils import read_file +from youtube_dl.compat import compat_open as open README_FILE = 'README.md' helptext = sys.stdin.read() @@ -10,8 +16,7 @@ if isinstance(helptext, bytes): helptext = helptext.decode('utf-8') -with io.open(README_FILE, encoding='utf-8') as f: - oldreadme = f.read() +oldreadme = read_file(README_FILE) header = oldreadme[:oldreadme.index('# OPTIONS')] footer = oldreadme[oldreadme.index('# CONFIGURATION'):] @@ -20,7 +25,7 @@ options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options) options = '# OPTIONS\n' + options + '\n' -with io.open(README_FILE, 'w', encoding='utf-8') as f: +with open(README_FILE, 'w', encoding='utf-8') as f: f.write(header) f.write(options) f.write(footer) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 764795bc5..c424d18d7 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -1,17 +1,19 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import optparse -import os +import os.path import sys - # Import youtube_dl -ROOT_DIR = os.path.join(os.path.dirname(__file__), '..') -sys.path.insert(0, ROOT_DIR) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + import youtube_dl +from utils import write_file + def main(): parser = optparse.OptionParser(usage='%prog OUTFILE.md') @@ -38,8 +40,7 @@ def gen_ies_md(ies): ' - ' + md + '\n' for md in gen_ies_md(ies)) - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + write_file(outfile, out) if __name__ == '__main__': diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 76bf873e1..0090ada3e 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -1,13 +1,13 @@ from __future__ import unicode_literals -import io import optparse import os.path import re +from utils import read_file, write_file + ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') - PREFIX = r'''%YOUTUBE-DL(1) # NAME @@ -29,8 +29,7 @@ def main(): outfile, = args - with io.open(README_FILE, encoding='utf-8') as f: - readme = f.read() + readme = read_file(README_FILE) readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) @@ -38,8 +37,7 @@ def main(): readme = filter_options(readme) - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(readme) + write_file(outfile, readme) def filter_options(readme): diff --git a/devscripts/utils.py b/devscripts/utils.py new file mode 100644 index 000000000..2d072d2e0 --- /dev/null +++ b/devscripts/utils.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import argparse +import functools +import os.path +import subprocess +import sys + +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + +from youtube_dl.compat import ( + compat_kwargs, + compat_open as open, +) + + +def read_file(fname): + with open(fname, encoding='utf-8') as f: + return f.read() + + +def write_file(fname, content, mode='w'): + with open(fname, mode, encoding='utf-8') as f: + return f.write(content) + + +def read_version(fname='youtube_dl/version.py'): + """Get the version without importing the package""" + exec(compile(read_file(fname), fname, 'exec')) + return locals()['__version__'] + + +def get_filename_args(has_infile=False, default_outfile=None): + parser = argparse.ArgumentParser() + if has_infile: + parser.add_argument('infile', help='Input file') + kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {} + kwargs['help'] = 'Output file' + parser.add_argument('outfile', **compat_kwargs(kwargs)) + + opts = parser.parse_args() + if has_infile: + return opts.infile, opts.outfile + return opts.outfile + + +def compose_functions(*functions): + return lambda x: functools.reduce(lambda y, f: f(y), functions, x) + + +def run_process(*args, **kwargs): + kwargs.setdefault('text', True) + kwargs.setdefault('check', True) + kwargs.setdefault('capture_output', True) + if kwargs['text']: + kwargs.setdefault('encoding', 'utf-8') + kwargs.setdefault('errors', 'replace') + kwargs = compat_kwargs(kwargs) + return subprocess.run(args, **kwargs) diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 60aaf76cc..ebd552fcb 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -7,6 +7,8 @@ sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) import youtube_dl +from utils import read_file, write_file + ZSH_COMPLETION_FILE = "youtube-dl.zsh" ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" @@ -34,15 +36,13 @@ def build_completion(opt_parser): flags = [opt.get_opt_string() for opt in opts] - with open(ZSH_COMPLETION_TEMPLATE) as f: - template = f.read() + template = read_file(ZSH_COMPLETION_TEMPLATE) template = template.replace("{{fileopts}}", "|".join(fileopts)) template = template.replace("{{diropts}}", "|".join(diropts)) template = template.replace("{{flags}}", " ".join(flags)) - with open(ZSH_COMPLETION_FILE, "w") as f: - f.write(template) + write_file(ZSH_COMPLETION_FILE, template) parser = youtube_dl.parseOpts()[0] diff --git a/test/helper.py b/test/helper.py index aa99001b2..fc55c6b46 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import errno -import io import hashlib import json import os.path @@ -14,6 +13,7 @@ import youtube_dl.extractor from youtube_dl import YoutubeDL from youtube_dl.compat import ( + compat_open as open, compat_os_name, compat_str, ) @@ -29,10 +29,10 @@ def get_params(override=None): "parameters.json") LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "local_parameters.json") - with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + with open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) if os.path.exists(LOCAL_PARAMETERS_FILE): - with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: parameters.update(json.load(pf)) if override: parameters.update(override) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 34773fbd0..3f96645de 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals # Allow direct execution -import io import os import sys import unittest @@ -21,6 +20,7 @@ from youtube_dl.compat import ( compat_etree_fromstring, compat_http_server, + compat_open as open, ) from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import ( @@ -902,8 +902,8 @@ def test_parse_m3u8_formats(self): ] for m3u8_file, m3u8_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, + mode='r', encoding='utf-8') as f: formats = self.ie._parse_m3u8_formats( f.read(), m3u8_url, ext='mp4') self.ie._sort_formats(formats) @@ -1127,8 +1127,8 @@ def test_parse_mpd_formats(self): ] for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/mpd/%s.mpd' % mpd_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/mpd/%s.mpd' % mpd_file, + mode='r', encoding='utf-8') as f: formats = self.ie._parse_mpd_formats( compat_etree_fromstring(f.read().encode('utf-8')), mpd_base_url=mpd_base_url, mpd_url=mpd_url) @@ -1154,8 +1154,8 @@ def test_parse_f4m_formats(self): ] for f4m_file, f4m_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/f4m/%s.f4m' % f4m_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/f4m/%s.f4m' % f4m_file, + mode='r', encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( compat_etree_fromstring(f.read().encode('utf-8')), f4m_url, None) @@ -1202,8 +1202,8 @@ def test_parse_xspf(self): ] for xspf_file, xspf_url, expected_entries in _TEST_CASES: - with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/xspf/%s.xspf' % xspf_file, + mode='r', encoding='utf-8') as f: entries = self.ie._parse_xspf( compat_etree_fromstring(f.read().encode('utf-8')), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 6cf555827..d994682b2 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -22,6 +22,7 @@ compat_http_cookiejar_Cookie, compat_http_cookies_SimpleCookie, compat_kwargs, + compat_open as open, compat_str, compat_urllib_error, ) @@ -701,12 +702,12 @@ def test_postprocessors(self): class SimplePP(PostProcessor): def run(self, info): - with open(audiofile, 'wt') as f: + with open(audiofile, 'w') as f: f.write('EXAMPLE') return [info['filepath']], info def run_pp(params, PP): - with open(filename, 'wt') as f: + with open(filename, 'w') as f: f.write('EXAMPLE') ydl = YoutubeDL(params) ydl.add_post_processor(PP()) @@ -725,7 +726,7 @@ def run_pp(params, PP): class ModifierPP(PostProcessor): def run(self, info): - with open(info['filepath'], 'wt') as f: + with open(info['filepath'], 'w') as f: f.write('MODIFIED') return [], info diff --git a/test/test_download.py b/test/test_download.py index d50008307..e0bc8cb95 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -20,15 +20,15 @@ import hashlib -import io import json import socket import youtube_dl.YoutubeDL from youtube_dl.compat import ( compat_http_client, - compat_urllib_error, compat_HTTPError, + compat_open as open, + compat_urllib_error, ) from youtube_dl.utils import ( DownloadError, @@ -245,7 +245,7 @@ def try_rm_tcs_files(tcs=None): self.assertTrue( os.path.exists(info_json_fn), 'Missing info file %s' % info_json_fn) - with io.open(info_json_fn, encoding='utf-8') as infof: + with open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) expect_info_dict(self, info_dict, tc.get('info_dict', {})) finally: diff --git a/test/test_execution.py b/test/test_execution.py index ae59e562a..9daaafa6c 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -8,19 +8,18 @@ import sys import os import subprocess -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.compat import compat_register_utf8 +rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +sys.path.insert(0, rootDir) + +from youtube_dl.compat import compat_register_utf8, compat_subprocess_get_DEVNULL from youtube_dl.utils import encodeArgument compat_register_utf8() -rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -try: - _DEV_NULL = subprocess.DEVNULL -except AttributeError: - _DEV_NULL = open(os.devnull, 'wb') +_DEV_NULL = compat_subprocess_get_DEVNULL() class TestExecution(unittest.TestCase): @@ -52,10 +51,10 @@ def test_lazy_extractors(self): subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL) subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL) finally: - for x in ['', 'c'] if sys.version_info[0] < 3 else ['']: + for x in ('', 'c') if sys.version_info[0] < 3 else ('',): try: os.remove(lazy_extractors + x) - except (IOError, OSError): + except OSError: pass diff --git a/test/test_http.py b/test/test_http.py index 1a6b2e878..89580969d 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -41,10 +41,12 @@ from youtube_dl.utils import ( sanitized_Request, + update_Request, urlencode_postdata, ) from test.helper import ( + expectedFailureIf, FakeYDL, FakeLogger, http_server_port, @@ -243,6 +245,11 @@ def end_headers(self): class TestHTTP(unittest.TestCase): + # when does it make sense to check the SSL certificate? + _check_cert = ( + sys.version_info >= (3, 2) + or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 19))) + def setUp(self): # HTTP server self.http_httpd = compat_http_server.HTTPServer( @@ -307,10 +314,7 @@ def _test_url(self, path, host='127.0.0.1', scheme='http', port=None): else self.https_port if scheme == 'https' else self.http_port, path) - @unittest.skipUnless( - sys.version_info >= (3, 2) - or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 9)), - 'No support for certificate check in SSL') + @unittest.skipUnless(_check_cert, 'No support for certificate check in SSL') def test_nocheckcertificate(self): with FakeYDL({'logger': FakeLogger()}) as ydl: with self.assertRaises(compat_urllib_error.URLError): @@ -376,6 +380,8 @@ def do_req(redirect_status, method, check_no_content=False): with self.assertRaises(compat_urllib_HTTPError): do_req(code, 'GET') + # Jython 2.7.1 times out for some reason + @expectedFailureIf(sys.platform.startswith('java') and sys.version_info < (2, 7, 2)) def test_content_type(self): # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28 with FakeYDL({'nocheckcertificate': True}) as ydl: @@ -390,6 +396,18 @@ def test_content_type(self): headers = ydl.urlopen(r).read().decode('utf-8') self.assertIn('Content-Type: application/x-www-form-urlencoded', headers) + def test_update_req(self): + req = sanitized_Request('http://example.com') + assert req.data is None + assert req.get_method() == 'GET' + assert not req.has_header('Content-Type') + # Test that zero-byte payloads will be sent + req = update_Request(req, data=b'') + assert req.data == b'' + assert req.get_method() == 'POST' + # yt-dl expects data to be encoded and Content-Type to be added by sender + # assert req.get_header('Content-Type') == 'application/x-www-form-urlencoded' + def test_cookiejar(self): with FakeYDL() as ydl: ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie( diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py index 9f18055e6..7c282ee00 100644 --- a/test/test_swfinterp.py +++ b/test/test_swfinterp.py @@ -5,16 +5,18 @@ import os import sys import unittest -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) import errno -import io import json import re import subprocess from youtube_dl.swfinterp import SWFInterpreter +from youtube_dl.compat import compat_open as open TEST_DIR = os.path.join( @@ -43,7 +45,7 @@ def test_func(self): '-static-link-runtime-shared-libraries', as_file]) except OSError as ose: if ose.errno == errno.ENOENT: - print('mxmlc not found! Skipping test.') + self.skipTest('mxmlc not found!') return raise @@ -51,7 +53,7 @@ def test_func(self): swf_content = swf_f.read() swfi = SWFInterpreter(swf_content) - with io.open(as_file, 'r', encoding='utf-8') as as_f: + with open(as_file, 'r', encoding='utf-8') as as_f: as_content = as_f.read() def _find_spec(key): diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py index c7c2252f5..0c83f2a0c 100644 --- a/test/test_unicode_literals.py +++ b/test/test_unicode_literals.py @@ -2,14 +2,15 @@ # Allow direct execution import os +import re import sys import unittest -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import io -import re +dirn = os.path.dirname + +rootDir = dirn(dirn(os.path.abspath(__file__))) -rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, rootDir) IGNORED_FILES = [ 'setup.py', # http://bugs.python.org/issue13943 @@ -24,6 +25,7 @@ ] from test.helper import assertRegexpMatches +from youtube_dl.compat import compat_open as open class TestUnicodeLiterals(unittest.TestCase): @@ -41,7 +43,7 @@ def test_all_files(self): continue fn = os.path.join(dirpath, basename) - with io.open(fn, encoding='utf-8') as inf: + with open(fn, encoding='utf-8') as inf: code = inf.read() if "'" not in code and '"' not in code: diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py index 41abdfe3b..68e0a391d 100644 --- a/test/test_write_annotations.py +++ b/test/test_write_annotations.py @@ -11,12 +11,11 @@ from test.helper import get_params, try_rm -import io - import xml.etree.ElementTree import youtube_dl.YoutubeDL import youtube_dl.extractor +from youtube_dl.compat import compat_open as open class YoutubeDL(youtube_dl.YoutubeDL): @@ -51,7 +50,7 @@ def test_info_json(self): ydl.download([TEST_ID]) self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) annoxml = None - with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: + with open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: annoxml = xml.etree.ElementTree.parse(annof) self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') root = annoxml.getroot() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 5dcabaf95..f45dfec7c 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -8,11 +8,14 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import io import re import string -from youtube_dl.compat import compat_str, compat_urlretrieve +from youtube_dl.compat import ( + compat_open as open, + compat_str, + compat_urlretrieve, +) from test.helper import FakeYDL from youtube_dl.extractor import YoutubeIE @@ -208,7 +211,7 @@ def test_func(self): if not os.path.exists(fn): compat_urlretrieve(url, fn) - with io.open(fn, encoding='utf-8') as testf: + with open(fn, encoding='utf-8') as testf: jscode = testf.read() self.assertEqual(sig_func(jscode, sig_input), expected_sig) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 127eabfe0..8a6bc6eb8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -4,11 +4,9 @@ from __future__ import absolute_import, unicode_literals import collections -import contextlib import copy import datetime import errno -import fileinput import io import itertools import json @@ -45,6 +43,7 @@ compat_kwargs, compat_map as map, compat_numeric_types, + compat_open as open, compat_os_name, compat_str, compat_tokenize_tokenize, @@ -1981,7 +1980,7 @@ def ensure_dir_exists(path): else: try: self.to_screen('[info] Writing video description to: ' + descfn) - with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(info_dict['description']) except (OSError, IOError): self.report_error('Cannot write description file ' + descfn) @@ -1996,7 +1995,7 @@ def ensure_dir_exists(path): else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) - with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning('There are no annotations to write.') @@ -2023,7 +2022,7 @@ def ensure_dir_exists(path): try: # Use newline='' to prevent conversion of newline characters # See https://github.com/ytdl-org/youtube-dl/issues/10268 - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: + with open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_info['data']) except (OSError, IOError): self.report_error('Cannot write subtitles file ' + sub_filename) @@ -2032,7 +2031,7 @@ def ensure_dir_exists(path): try: sub_data = ie._request_webpage( sub_info['url'], info_dict['id'], note=False).read() - with io.open(encodeFilename(sub_filename), 'wb') as subfile: + with open(encodeFilename(sub_filename), 'wb') as subfile: subfile.write(sub_data) except (ExtractorError, IOError, OSError, ValueError) as err: self.report_warning('Unable to download subtitle for "%s": %s' % @@ -2236,12 +2235,8 @@ def download(self, url_list): return self._download_retcode def download_with_info_file(self, info_filename): - with contextlib.closing(fileinput.FileInput( - [info_filename], mode='r', - openhook=fileinput.hook_encoded('utf-8'))) as f: - # FileInput doesn't have a read method, we can't call json.load - # TODO: let's use io.open(), then - info = self.filter_requested_info(json.loads('\n'.join(f))) + with open(info_filename, encoding='utf-8') as f: + info = self.filter_requested_info(json.load(f)) try: self.process_ie_result(info, download=True) except DownloadError: diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py index 4822439d0..54123da0e 100644 --- a/youtube_dl/cache.py +++ b/youtube_dl/cache.py @@ -1,14 +1,16 @@ from __future__ import unicode_literals import errno -import io import json import os import re import shutil import traceback -from .compat import compat_getenv +from .compat import ( + compat_getenv, + compat_open as open, +) from .utils import ( error_to_compat_str, expand_path, @@ -83,7 +85,7 @@ def load(self, section, key, dtype='json', default=None, min_ver=None): cache_fn = self._get_cache_fn(section, key, dtype) try: try: - with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + with open(cache_fn, 'r', encoding='utf-8') as cachef: return self._validate(json.load(cachef), min_ver) except ValueError: try: diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 1d784d90f..da6d70ec4 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -131,7 +131,7 @@ class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie): def load(self, rawdata): must_have_value = 0 if not isinstance(rawdata, dict): - if sys.version_info[:2] != (2, 7): + if sys.version_info[:2] != (2, 7) or sys.platform.startswith('java'): # attribute must have value for parsing rawdata, must_have_value = re.subn( r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7f416d312..0eca9f844 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -25,6 +25,7 @@ compat_integer_types, compat_http_client, compat_map as map, + compat_open as open, compat_os_name, compat_str, compat_urllib_error, diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b05d60435..45b1add73 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -7,6 +7,7 @@ import tempfile from ..compat import ( + compat_open as open, compat_urlparse, compat_kwargs, ) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 5e7b6e2df..b6c60e127 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -18,6 +18,8 @@ shell_quote, ) +from ..compat import compat_open as open + class EmbedThumbnailPPError(PostProcessingError): pass diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 8c29c8d59..801160e6c 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import io import os import subprocess import time @@ -9,6 +8,7 @@ from .common import AudioConversionError, PostProcessor +from ..compat import compat_open as open from ..utils import ( encodeArgument, encodeFilename, @@ -493,7 +493,7 @@ def add(meta_list, info_list=None): chapters = info.get('chapters', []) if chapters: metadata_filename = replace_extension(filename, 'meta') - with io.open(metadata_filename, 'wt', encoding='utf-8') as f: + with open(metadata_filename, 'w', encoding='utf-8') as f: def ffmpeg_escape(text): return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text) @@ -636,7 +636,7 @@ def run(self, info): with open(dfxp_file, 'rb') as f: srt_data = dfxp2srt(f.read()) - with io.open(srt_file, 'wt', encoding='utf-8') as f: + with open(srt_file, 'w', encoding='utf-8') as f: f.write(srt_data) old_file = srt_file @@ -652,7 +652,7 @@ def run(self, info): self.run_ffmpeg(old_file, new_file, ['-f', new_format]) - with io.open(new_file, 'rt', encoding='utf-8') as f: + with open(new_file, 'r', encoding='utf-8') as f: subs[lang] = { 'ext': new_ext, 'data': f.read(), diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 4357ec14b..3261053a7 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -9,7 +9,10 @@ import traceback from zipimport import zipimporter -from .compat import compat_realpath +from .compat import ( + compat_open as open, + compat_realpath, +) from .utils import encode_compat_str from .version import __version__ diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6d798f13a..b5475434f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2996,8 +2996,7 @@ def redirect_request(self, req, fp, code, msg, headers, newurl): # Technically the Cookie header should be in unredirected_hdrs; # however in practice some may set it in normal headers anyway. # We will remove it here to prevent any leaks. - # Also remove unwanted and undocumented Host header for old URL - remove_headers = ['Cookie', 'Host'] + remove_headers = ['Cookie'] # A 303 must either use GET or HEAD for subsequent request # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4 @@ -3016,7 +3015,7 @@ def redirect_request(self, req, fp, code, msg, headers, newurl): remove_headers.extend(['Content-Length', 'Content-Type']) # NB: don't use dict comprehension for python 2.6 compatibility - new_headers = dict((k, v) for k, v in req.header_items() + new_headers = dict((k, v) for k, v in req.headers.items() if k.title() not in remove_headers) return compat_urllib_request.Request( @@ -4187,7 +4186,7 @@ def update_url_query(url, query): def update_Request(req, url=None, data=None, headers={}, query={}): req_headers = req.headers.copy() req_headers.update(headers) - req_data = data or req.data + req_data = data if data is not None else req.data req_url = update_url_query(url or req.get_full_url(), query) req_get_method = req.get_method() if req_get_method == 'HEAD':