Compare commits
278 Commits
2016.02.22
...
2016.03.18
Author | SHA1 | Date | |
---|---|---|---|
![]() |
0d33166ec5 | ||
![]() |
87c03c6bd2 | ||
![]() |
4c92fd2e83 | ||
![]() |
e3d17b3c07 | ||
![]() |
810c10baa1 | ||
![]() |
57f7e3c62d | ||
![]() |
0d0e282912 | ||
![]() |
85e8f26b82 | ||
![]() |
b57fecfddd | ||
![]() |
8c97e7efb6 | ||
![]() |
cc162f6a0a | ||
![]() |
cf45ed786e | ||
![]() |
574b2a7393 | ||
![]() |
9f02ff537c | ||
![]() |
0436ec0e7a | ||
![]() |
11f12195af | ||
![]() |
a646a8cf98 | ||
![]() |
63f41d3821 | ||
![]() |
c5229f3926 | ||
![]() |
96f4f796fb | ||
![]() |
70cab344c4 | ||
![]() |
a7ba57dc17 | ||
![]() |
83548824c2 | ||
![]() |
354dbbd880 | ||
![]() |
23edc49509 | ||
![]() |
48254c3f2c | ||
![]() |
2cab48704c | ||
![]() |
64d4f31d78 | ||
![]() |
0c9ff24041 | ||
![]() |
3ff8279e80 | ||
![]() |
cb6e477dfe | ||
![]() |
edfd93518e | ||
![]() |
89807d6a82 | ||
![]() |
49dea4913b | ||
![]() |
dec2cae0a7 | ||
![]() |
cf6cd07396 | ||
![]() |
975b9c9ab0 | ||
![]() |
8ac73bdbe4 | ||
![]() |
877f440f7b | ||
![]() |
d13bdc3824 | ||
![]() |
744daf9418 | ||
![]() |
bf475e1990 | ||
![]() |
203f3d779a | ||
![]() |
4230c4894d | ||
![]() |
6bb266693f | ||
![]() |
5d53c32701 | ||
![]() |
2e7e561c1d | ||
![]() |
d8515fd41c | ||
![]() |
694c47b261 | ||
![]() |
77dea16ac8 | ||
![]() |
6ae27bed01 | ||
![]() |
da1973a038 | ||
![]() |
be24916a7f | ||
![]() |
2cb99ebbd0 | ||
![]() |
91ee320bfa | ||
![]() |
8fb754bcd0 | ||
![]() |
b7b72db9ad | ||
![]() |
634415ca17 | ||
![]() |
2f7ae819ac | ||
![]() |
0a477f8731 | ||
![]() |
a755f82549 | ||
![]() |
7f4173ae7c | ||
![]() |
fb47597b09 | ||
![]() |
450b233cc2 | ||
![]() |
b7d7674f1e | ||
![]() |
0e832c2c97 | ||
![]() |
8e4aa7bf18 | ||
![]() |
a42dfa629e | ||
![]() |
b970dfddaf | ||
![]() |
46a4ea8276 | ||
![]() |
3f2f4a94aa | ||
![]() |
f930e0c76e | ||
![]() |
0fdbb3322b | ||
![]() |
e9c8999ede | ||
![]() |
73cbd709f9 | ||
![]() |
9dce3c095b | ||
![]() |
e5a2e17a9c | ||
![]() |
0ec589fac3 | ||
![]() |
36bb63e084 | ||
![]() |
91d6aafb48 | ||
![]() |
c8868a9d83 | ||
![]() |
09f572fbc0 | ||
![]() |
58e6d097d8 | ||
![]() |
15bf934de5 | ||
![]() |
cdfee16818 | ||
![]() |
bcb668de18 | ||
![]() |
fac7e79277 | ||
![]() |
a6c8b75904 | ||
![]() |
25cb05bda9 | ||
![]() |
6fa6d38549 | ||
![]() |
883c052378 | ||
![]() |
61f317c24c | ||
![]() |
64f08d4ff2 | ||
![]() |
e738e43358 | ||
![]() |
f6f6217a98 | ||
![]() |
31db8709bf | ||
![]() |
5080cbf9fd | ||
![]() |
9880124196 | ||
![]() |
9c7b509b2a | ||
![]() |
e0dccdd398 | ||
![]() |
5d583bdf6c | ||
![]() |
1e501364d5 | ||
![]() |
74278def2e | ||
![]() |
e375a149e1 | ||
![]() |
2bfc0e97f6 | ||
![]() |
ac45505528 | ||
![]() |
7404061141 | ||
![]() |
46c329d6f6 | ||
![]() |
1818e4c2b4 | ||
![]() |
e7bd17373d | ||
![]() |
c58e74062f | ||
![]() |
6d210f2090 | ||
![]() |
af7d5a63b2 | ||
![]() |
e41acb6364 | ||
![]() |
bdf7f13954 | ||
![]() |
0f56a4b443 | ||
![]() |
1b5284b13f | ||
![]() |
d1e4a464cd | ||
![]() |
ff059017c0 | ||
![]() |
f22ba4bd60 | ||
![]() |
1db772673e | ||
![]() |
75313f2baa | ||
![]() |
090eb8e25f | ||
![]() |
a9793f58a1 | ||
![]() |
7177fd24f8 | ||
![]() |
1e501f6c40 | ||
![]() |
2629a3802c | ||
![]() |
51ce91174b | ||
![]() |
107d0c421a | ||
![]() |
18b0b23992 | ||
![]() |
d1b29d1342 | ||
![]() |
2def60c5f3 | ||
![]() |
19a17d4623 | ||
![]() |
845817aadf | ||
![]() |
3233a68fbb | ||
![]() |
cf074e5ddd | ||
![]() |
002c755248 | ||
![]() |
d627cec608 | ||
![]() |
1315224cbb | ||
![]() |
7760b9ff4d | ||
![]() |
28559564b2 | ||
![]() |
fa880d20ad | ||
![]() |
ae7d31af1c | ||
![]() |
9d303bf29b | ||
![]() |
5f1688f271 | ||
![]() |
1d4c9ed90c | ||
![]() |
d48352fb5d | ||
![]() |
6d6536acb2 | ||
![]() |
b6f94d81ea | ||
![]() |
8477a69283 | ||
![]() |
d58cb3ec7e | ||
![]() |
8a370aedac | ||
![]() |
24ca0e9c0b | ||
![]() |
e1dd521e49 | ||
![]() |
1255733945 | ||
![]() |
3201a67f61 | ||
![]() |
d0ff690d68 | ||
![]() |
fb640d0a3d | ||
![]() |
38f9ef31dc | ||
![]() |
a8276b2680 | ||
![]() |
ececca6cde | ||
![]() |
8bbb4b56ee | ||
![]() |
539a1641c6 | ||
![]() |
1b0635aba3 | ||
![]() |
429491f531 | ||
![]() |
e9c0cdd389 | ||
![]() |
0cae023b24 | ||
![]() |
8ee239e921 | ||
![]() |
8bb56eeeea | ||
![]() |
fa9e259fd9 | ||
![]() |
f3bdae76de | ||
![]() |
03879ff054 | ||
![]() |
c8398a9b87 | ||
![]() |
b8972bd69d | ||
![]() |
0ae937a798 | ||
![]() |
4459bef203 | ||
![]() |
e07237f640 | ||
![]() |
8c5a994424 | ||
![]() |
2eb25b256b | ||
![]() |
f3bc19a989 | ||
![]() |
7a8fef3173 | ||
![]() |
7465e7e42d | ||
![]() |
5e73a67d44 | ||
![]() |
2316dc2b9a | ||
![]() |
a2d7797cee | ||
![]() |
fd050249af | ||
![]() |
7bcd2830dd | ||
![]() |
47462a125b | ||
![]() |
7caf9830b0 | ||
![]() |
2bc0c46f98 | ||
![]() |
3318832e9d | ||
![]() |
e7d2084568 | ||
![]() |
c2d3cb4c63 | ||
![]() |
c48dd4400f | ||
![]() |
e38cafe986 | ||
![]() |
85ca019d96 | ||
![]() |
4a5ba28a87 | ||
![]() |
82156fdbf0 | ||
![]() |
6114090418 | ||
![]() |
3099b31276 | ||
![]() |
f17f86513e | ||
![]() |
90f794c6c3 | ||
![]() |
66ca2cfddd | ||
![]() |
269dd2c6a7 | ||
![]() |
e7998f59aa | ||
![]() |
9fb556eef0 | ||
![]() |
e781ab63db | ||
![]() |
3e76968220 | ||
![]() |
2812c24c16 | ||
![]() |
d77ab8e255 | ||
![]() |
4b3cd7316c | ||
![]() |
6dae56384a | ||
![]() |
2b2dfae83e | ||
![]() |
6c10dbeae9 | ||
![]() |
9173202b84 | ||
![]() |
8870bb4653 | ||
![]() |
7a0e7779fe | ||
![]() |
a048ffc9b0 | ||
![]() |
4587915b2a | ||
![]() |
da665ddc25 | ||
![]() |
5add979d91 | ||
![]() |
20afe8bd14 | ||
![]() |
940b606a07 | ||
![]() |
9505053704 | ||
![]() |
2c9ca78281 | ||
![]() |
63719a8ac3 | ||
![]() |
8fab62482a | ||
![]() |
d6e9c2706f | ||
![]() |
f7f2e53a0a | ||
![]() |
9cdffeeb3f | ||
![]() |
fbb6edd298 | ||
![]() |
5eb6bdced4 | ||
![]() |
5633b4d39d | ||
![]() |
4435c6e98e | ||
![]() |
2ebd2eac88 | ||
![]() |
b78b292f0c | ||
![]() |
efbd6fb8bb | ||
![]() |
680079be39 | ||
![]() |
e4fc8d2ebe | ||
![]() |
f52354a889 | ||
![]() |
59f898b7a7 | ||
![]() |
8f4a2124a9 | ||
![]() |
481888294d | ||
![]() |
d1e440a4a1 | ||
![]() |
81bdc8fdf6 | ||
![]() |
e048d87fc9 | ||
![]() |
e26cde0927 | ||
![]() |
20108c6b90 | ||
![]() |
9195ef745a | ||
![]() |
d0459c530d | ||
![]() |
f160785c5c | ||
![]() |
5c0a57185c | ||
![]() |
43479d9e9d | ||
![]() |
c0da50d2b2 | ||
![]() |
c24883a1c0 | ||
![]() |
1b77ee6248 | ||
![]() |
bf4b3b6bd9 | ||
![]() |
efbeddead3 | ||
![]() |
3cfeb1624a | ||
![]() |
b95dc034ca | ||
![]() |
86a7dbe66e | ||
![]() |
b43a7a92cd | ||
![]() |
6563d31710 | ||
![]() |
cf89ba9eff | ||
![]() |
9b01272832 | ||
![]() |
58525c94d5 | ||
![]() |
621bd0cda9 | ||
![]() |
1610f770d7 | ||
![]() |
0fc871d2f0 | ||
![]() |
1ad6143061 | ||
![]() |
101067de12 | ||
![]() |
c1c05c67ea | ||
![]() |
399a76e67b | ||
![]() |
f34294fa0c | ||
![]() |
99cbe98ce8 | ||
![]() |
12b84ac8c1 | ||
![]() |
199e724291 | ||
![]() |
dd86780596 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,5 +1,6 @@
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.class
|
||||
*~
|
||||
*.DS_Store
|
||||
wine-py2exe/
|
||||
@@ -32,4 +33,4 @@ test/testdata
|
||||
.tox
|
||||
youtube-dl.zsh
|
||||
.idea
|
||||
.idea/*
|
||||
.idea/*
|
||||
|
3
AUTHORS
3
AUTHORS
@@ -160,3 +160,6 @@ Erwin de Haan
|
||||
Jens Wille
|
||||
Robin Houtevelts
|
||||
Patrick Griffis
|
||||
Aidan Rowe
|
||||
mutantmonkey
|
||||
Ben Congdon
|
||||
|
@@ -92,7 +92,9 @@ If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
@@ -140,16 +142,17 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
|
||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
|
3
Makefile
3
Makefile
@@ -3,6 +3,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
PREFIX ?= /usr/local
|
||||
BINDIR ?= $(PREFIX)/bin
|
||||
@@ -44,7 +45,7 @@ test:
|
||||
ot: offlinetest
|
||||
|
||||
offlinetest: codetest
|
||||
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
|
||||
$(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
|
||||
|
||||
tar: youtube-dl.tar.gz
|
||||
|
||||
|
51
README.md
51
README.md
@@ -80,6 +80,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
on Windows)
|
||||
--flat-playlist Do not extract the videos of a playlist,
|
||||
only list them.
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
--no-mark-watched Do not mark videos watched (YouTube only)
|
||||
--no-color Do not emit color codes in output
|
||||
|
||||
## Network Options:
|
||||
@@ -179,7 +181,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
to play it)
|
||||
--external-downloader COMMAND Use the specified external downloader.
|
||||
Currently supports
|
||||
aria2c,axel,curl,httpie,wget
|
||||
aria2c,avconv,axel,curl,ffmpeg,httpie,wget
|
||||
--external-downloader-args ARGS Give these arguments to the external
|
||||
downloader
|
||||
|
||||
@@ -409,13 +411,18 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime and use a proxy:
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
--extract-audio
|
||||
-x
|
||||
--no-mtime
|
||||
--proxy 127.0.0.1:3128
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
```
|
||||
|
||||
Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
|
||||
|
||||
You can use `--ignore-config` if you want to disable the configuration file for a particular youtube-dl run.
|
||||
|
||||
### Authentication with `.netrc` file
|
||||
@@ -440,7 +447,11 @@ On Windows you may also need to setup the `%HOME%` environment variable manually
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
|
||||
The `-o` option allows users to indicate a template for the output file names.
|
||||
|
||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
|
||||
|
||||
- `id`: Video identifier
|
||||
- `title`: Video title
|
||||
@@ -449,6 +460,7 @@ The `-o` option allows users to indicate a template for the output file names. T
|
||||
- `alt_title`: A secondary title of the video
|
||||
- `display_id`: An alternative identifier for the video
|
||||
- `uploader`: Full name of the video uploader
|
||||
- `license`: License name the video is licensed under
|
||||
- `creator`: The main artist who created the video
|
||||
- `release_date`: The date (YYYYMMDD) when the video was released
|
||||
- `timestamp`: UNIX timestamp of the moment the video became available
|
||||
@@ -513,7 +525,9 @@ The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||
|
||||
Examples (note on Windows you may need to use double quotes instead of single):
|
||||
#### Output template examples
|
||||
|
||||
Note on Windows you may need to use double quotes instead of single.
|
||||
|
||||
```bash
|
||||
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
|
||||
@@ -525,6 +539,9 @@ youtube-dl_test_video_.mp4 # A simple file name
|
||||
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
||||
$ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||
|
||||
# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
|
||||
$ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/user/TheLinuxFoundation/playlists
|
||||
|
||||
# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
|
||||
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
|
||||
|
||||
@@ -543,6 +560,8 @@ But sometimes you may want to download in a different format, for example when y
|
||||
|
||||
The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
||||
|
||||
**tl;dr:** [navigate me to examples](#format-selection-examples).
|
||||
|
||||
The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
|
||||
|
||||
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download best quality format of particular file extension served as a single file, e.g. `-f webm` will download best quality format with `webm` extension served as a single file.
|
||||
@@ -588,11 +607,14 @@ You can merge the video and audio of two formats into a single file using `-f <v
|
||||
|
||||
Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
|
||||
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
|
||||
|
||||
Examples (note on Windows you may need to use double quotes instead of single):
|
||||
#### Format selection examples
|
||||
|
||||
Note on Windows you may need to use double quotes instead of single.
|
||||
|
||||
```bash
|
||||
# Download best mp4 format available or any other best if no mp4 available
|
||||
$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
|
||||
@@ -733,7 +755,7 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
|
||||
|
||||
### What is this binary file? Where has the code gone?
|
||||
|
||||
Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
|
||||
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
|
||||
|
||||
### The exe throws a *Runtime error from Visual C++*
|
||||
|
||||
@@ -816,7 +838,9 @@ If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
@@ -864,16 +888,17 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
|
||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
|
@@ -54,6 +54,7 @@
|
||||
- **AtresPlayer**
|
||||
- **ATTTechChannel**
|
||||
- **AudiMedia**
|
||||
- **AudioBoom**
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **Azubu**
|
||||
@@ -77,8 +78,10 @@
|
||||
- **BleacherReportCMS**
|
||||
- **blinkx**
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
- **BR**: Bayerischer Rundfunk Mediathek
|
||||
- **BravoTV**
|
||||
- **Break**
|
||||
- **brightcove:legacy**
|
||||
- **brightcove:new**
|
||||
@@ -166,6 +169,8 @@
|
||||
- **Dump**
|
||||
- **Dumpert**
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **dw**
|
||||
- **dw:article**
|
||||
- **EaglePlatform**
|
||||
- **EbaumsWorld**
|
||||
- **EchoMsk**
|
||||
@@ -189,10 +194,10 @@
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **facebook**
|
||||
- **facebook:post**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **Fczenit**
|
||||
- **features.aol.com**
|
||||
- **fernsehkritik.tv**
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
@@ -292,6 +297,7 @@
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
- **Ku6**
|
||||
- **KUSI**
|
||||
- **kuwo:album**: 酷我音乐 - 专辑
|
||||
- **kuwo:category**: 酷我音乐 - 分类
|
||||
- **kuwo:chart**: 酷我音乐 - 排行榜
|
||||
@@ -300,12 +306,11 @@
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.tv**
|
||||
- **Laola1Tv**
|
||||
- **Le**: 乐视网
|
||||
- **Lecture2Go**
|
||||
- **Lemonde**
|
||||
- **Letv**: 乐视网
|
||||
- **LePlaylist**
|
||||
- **LetvCloud**: 乐视云
|
||||
- **LetvPlaylist**
|
||||
- **LetvTv**
|
||||
- **Libsyn**
|
||||
- **life:embed**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
@@ -323,6 +328,7 @@
|
||||
- **m6**
|
||||
- **macgamestore**: MacGameStore trailers
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **MakersChannel**
|
||||
- **MakerTV**
|
||||
- **Malemotion**
|
||||
- **MatchTV**
|
||||
@@ -333,6 +339,7 @@
|
||||
- **Mgoon**
|
||||
- **Minhateca**
|
||||
- **MinistryGrid**
|
||||
- **Minoto**
|
||||
- **miomio.tv**
|
||||
- **MiTele**: mitele.es
|
||||
- **mixcloud**
|
||||
@@ -420,6 +427,7 @@
|
||||
- **Npr**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
- **NRKSkole**: NRK Skole
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
@@ -492,6 +500,7 @@
|
||||
- **Restudy**
|
||||
- **ReverbNation**
|
||||
- **Revision3**
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
@@ -560,7 +569,6 @@
|
||||
- **southpark.de**
|
||||
- **southpark.nl**
|
||||
- **southparkstudios.dk**
|
||||
- **Space**
|
||||
- **SpankBang**
|
||||
- **Spankwire**
|
||||
- **Spiegel**
|
||||
@@ -611,6 +619,7 @@
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheSixtyOne**
|
||||
- **TheStar**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **THVideo**
|
||||
@@ -620,6 +629,7 @@
|
||||
- **TMZ**
|
||||
- **TMZArticle**
|
||||
- **TNAFlix**
|
||||
- **TNAFlixNetworkEmbed**
|
||||
- **toggle**
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
@@ -643,6 +653,7 @@
|
||||
- **tv.dfb.de**
|
||||
- **TV2**
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TVC**
|
||||
- **TVCArticle**
|
||||
@@ -668,8 +679,10 @@
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **Unistra**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **USAToday**
|
||||
- **ustream**
|
||||
- **ustream:channel**
|
||||
- **Ustudio**
|
||||
- **Varzesh3**
|
||||
- **Vbox7**
|
||||
- **VeeHD**
|
||||
@@ -680,12 +693,13 @@
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **Vice**
|
||||
- **ViceShow**
|
||||
- **Viddler**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.mit.edu**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **VideoMega** (Currently broken)
|
||||
- **VideoMega**
|
||||
- **videomore**
|
||||
- **videomore:season**
|
||||
- **videomore:video**
|
||||
@@ -707,6 +721,7 @@
|
||||
- **vimeo:channel**
|
||||
- **vimeo:group**
|
||||
- **vimeo:likes**: Vimeo user likes
|
||||
- **vimeo:ondemand**
|
||||
- **vimeo:review**: Review pages on vimeo
|
||||
- **vimeo:user**
|
||||
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
|
||||
|
@@ -11,8 +11,11 @@ import sys
|
||||
|
||||
import youtube_dl.extractor
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.utils import (
|
||||
from youtube_dl.compat import (
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
)
|
||||
from youtube_dl.utils import (
|
||||
preferredencoding,
|
||||
write_string,
|
||||
)
|
||||
@@ -42,7 +45,7 @@ def report_warning(message):
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
if sys.stderr.isatty() and compat_os_name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
|
@@ -502,6 +502,9 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
assertRegexpMatches(self, ydl._format_note({
|
||||
'vbr': 10,
|
||||
}), '^\s*10k$')
|
||||
assertRegexpMatches(self, ydl._format_note({
|
||||
'fps': 30,
|
||||
}), '^30fps$')
|
||||
|
||||
def test_postprocessors(self):
|
||||
filename = 'post-processor-testfile.mp4'
|
||||
|
@@ -52,7 +52,12 @@ class TestHTTP(unittest.TestCase):
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
self.httpd.socket = ssl.wrap_socket(
|
||||
self.httpd.socket, certfile=certfn, server_side=True)
|
||||
self.port = self.httpd.socket.getsockname()[1]
|
||||
if os.name == 'java':
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = self.httpd.socket.sock
|
||||
else:
|
||||
sock = self.httpd.socket
|
||||
self.port = sock.getsockname()[1]
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
@@ -18,6 +18,7 @@ import xml.etree.ElementTree
|
||||
from youtube_dl.utils import (
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
encode_base_n,
|
||||
clean_html,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
@@ -27,6 +28,7 @@ from youtube_dl.utils import (
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
@@ -40,6 +42,7 @@ from youtube_dl.utils import (
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
@@ -60,6 +63,7 @@ from youtube_dl.utils import (
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
update_url_query,
|
||||
version_tuple,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
@@ -74,7 +78,10 @@ from youtube_dl.utils import (
|
||||
cli_bool_option,
|
||||
)
|
||||
from youtube_dl.compat import (
|
||||
compat_chr,
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
)
|
||||
|
||||
|
||||
@@ -249,6 +256,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(
|
||||
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
|
||||
'20150202')
|
||||
self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||
|
||||
@@ -452,6 +460,40 @@ class TestUtil(unittest.TestCase):
|
||||
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||
self.assertTrue(isinstance(data, bytes))
|
||||
|
||||
def test_update_url_query(self):
|
||||
def query_dict(url):
|
||||
return compat_parse_qs(compat_urlparse.urlparse(url).query)
|
||||
self.assertEqual(query_dict(update_url_query(
|
||||
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
|
||||
query_dict('http://example.com/path?quality=HD&format=mp4'))
|
||||
self.assertEqual(query_dict(update_url_query(
|
||||
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
|
||||
query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
|
||||
self.assertEqual(query_dict(update_url_query(
|
||||
'http://example.com/path', {'fields': 'id,formats,subtitles'})),
|
||||
query_dict('http://example.com/path?fields=id,formats,subtitles'))
|
||||
self.assertEqual(query_dict(update_url_query(
|
||||
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
|
||||
query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
|
||||
self.assertEqual(query_dict(update_url_query(
|
||||
'http://example.com/path?manifest=f4m', {'manifest': []})),
|
||||
query_dict('http://example.com/path'))
|
||||
self.assertEqual(query_dict(update_url_query(
|
||||
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
|
||||
query_dict('http://example.com/path?system=LINUX'))
|
||||
self.assertEqual(query_dict(update_url_query(
|
||||
'http://example.com/path', {'fields': b'id,formats,subtitles'})),
|
||||
query_dict('http://example.com/path?fields=id,formats,subtitles'))
|
||||
self.assertEqual(query_dict(update_url_query(
|
||||
'http://example.com/path', {'width': 1080, 'height': 720})),
|
||||
query_dict('http://example.com/path?width=1080&height=720'))
|
||||
self.assertEqual(query_dict(update_url_query(
|
||||
'http://example.com/path', {'bitrate': 5020.43})),
|
||||
query_dict('http://example.com/path?bitrate=5020.43'))
|
||||
self.assertEqual(query_dict(update_url_query(
|
||||
'http://example.com/path', {'test': '第二行тест'})),
|
||||
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
|
||||
|
||||
def test_dict_get(self):
|
||||
FALSE_VALUES = {
|
||||
'none': None,
|
||||
@@ -589,6 +631,44 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{"abc": "def",}')
|
||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"})
|
||||
self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'})
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e x="&">'), {'x': '&'}) # XML
|
||||
self.assertEqual(extract_attributes('<e x=""">'), {'x': '"'})
|
||||
self.assertEqual(extract_attributes('<e x="£">'), {'x': '£'}) # HTML 3.2
|
||||
self.assertEqual(extract_attributes('<e x="λ">'), {'x': 'λ'}) # HTML 4.0
|
||||
self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
|
||||
self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
|
||||
self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
|
||||
self.assertEqual(extract_attributes('<e x >'), {'x': None})
|
||||
self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None})
|
||||
self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'})
|
||||
self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
|
||||
self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased
|
||||
self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
|
||||
self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
|
||||
self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
|
||||
self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'})
|
||||
self.assertEqual(extract_attributes('<e x="décomposé">'), {'x': 'décompose\u0301'})
|
||||
# "Narrow" Python builds don't support unicode code points outside BMP.
|
||||
try:
|
||||
compat_chr(0x10000)
|
||||
supports_outside_bmp = True
|
||||
except ValueError:
|
||||
supports_outside_bmp = False
|
||||
if supports_outside_bmp:
|
||||
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
@@ -614,6 +694,15 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
|
||||
self.assertEqual(parse_filesize('1,24 KB'), 1240)
|
||||
|
||||
def test_parse_count(self):
|
||||
self.assertEqual(parse_count(None), None)
|
||||
self.assertEqual(parse_count(''), None)
|
||||
self.assertEqual(parse_count('0'), 0)
|
||||
self.assertEqual(parse_count('1000'), 1000)
|
||||
self.assertEqual(parse_count('1.000'), 1000)
|
||||
self.assertEqual(parse_count('1.1k'), 1100)
|
||||
self.assertEqual(parse_count('1.1kk'), 1100000)
|
||||
|
||||
def test_version_tuple(self):
|
||||
self.assertEqual(version_tuple('1'), (1,))
|
||||
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
|
||||
@@ -801,5 +890,16 @@ The first line
|
||||
ohdave_rsa_encrypt(b'aa111222', e, N),
|
||||
'726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
|
||||
|
||||
def test_encode_base_n(self):
|
||||
self.assertEqual(encode_base_n(0, 30), '0')
|
||||
self.assertEqual(encode_base_n(80, 30), '2k')
|
||||
|
||||
custom_table = '9876543210ZYXWVUTSRQPONMLKJIHGFEDCBA'
|
||||
self.assertEqual(encode_base_n(0, 30, custom_table), '9')
|
||||
self.assertEqual(encode_base_n(80, 30, custom_table), '7P')
|
||||
|
||||
self.assertRaises(ValueError, encode_base_n, 0, 70)
|
||||
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -24,9 +24,6 @@ import time
|
||||
import tokenize
|
||||
import traceback
|
||||
|
||||
if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_cookiejar,
|
||||
@@ -34,6 +31,7 @@ from .compat import (
|
||||
compat_get_terminal_size,
|
||||
compat_http_client,
|
||||
compat_kwargs,
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
compat_tokenize_tokenize,
|
||||
compat_urllib_error,
|
||||
@@ -87,6 +85,7 @@ from .extractor import get_info_extractor, gen_extractors
|
||||
from .downloader import get_suitable_downloader
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .postprocessor import (
|
||||
FFmpegFixupM3u8PP,
|
||||
FFmpegFixupM4aPP,
|
||||
FFmpegFixupStretchedPP,
|
||||
FFmpegMergerPP,
|
||||
@@ -95,6 +94,9 @@ from .postprocessor import (
|
||||
)
|
||||
from .version import __version__
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
import ctypes
|
||||
|
||||
|
||||
class YoutubeDL(object):
|
||||
"""YoutubeDL class.
|
||||
@@ -450,7 +452,7 @@ class YoutubeDL(object):
|
||||
def to_console_title(self, message):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
||||
if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
||||
# c_wchar_p() might not be necessary if `message` is
|
||||
# already of type unicode()
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||
@@ -521,7 +523,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
if self.params.get('no_warnings'):
|
||||
return
|
||||
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
|
||||
if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
@@ -533,7 +535,7 @@ class YoutubeDL(object):
|
||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||
in red if stderr is a tty file.
|
||||
'''
|
||||
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
|
||||
if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
|
||||
_msg_header = '\033[0;31mERROR:\033[0m'
|
||||
else:
|
||||
_msg_header = 'ERROR:'
|
||||
@@ -566,7 +568,7 @@ class YoutubeDL(object):
|
||||
elif template_dict.get('height'):
|
||||
template_dict['resolution'] = '%sp' % template_dict['height']
|
||||
elif template_dict.get('width'):
|
||||
template_dict['resolution'] = '?x%d' % template_dict['width']
|
||||
template_dict['resolution'] = '%dx?' % template_dict['width']
|
||||
|
||||
sanitize = lambda k, v: sanitize_filename(
|
||||
compat_str(v),
|
||||
@@ -1232,6 +1234,10 @@ class YoutubeDL(object):
|
||||
if t.get('id') is None:
|
||||
t['id'] = '%d' % i
|
||||
|
||||
if self.params.get('list_thumbnails'):
|
||||
self.list_thumbnails(info_dict)
|
||||
return
|
||||
|
||||
if thumbnails and 'thumbnail' not in info_dict:
|
||||
info_dict['thumbnail'] = thumbnails[-1]['url']
|
||||
|
||||
@@ -1333,9 +1339,6 @@ class YoutubeDL(object):
|
||||
if self.params.get('listformats'):
|
||||
self.list_formats(info_dict)
|
||||
return
|
||||
if self.params.get('list_thumbnails'):
|
||||
self.list_thumbnails(info_dict)
|
||||
return
|
||||
|
||||
req_format = self.params.get('format')
|
||||
if req_format is None:
|
||||
@@ -1631,12 +1634,14 @@ class YoutubeDL(object):
|
||||
self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
|
||||
return
|
||||
|
||||
if success:
|
||||
if success and filename != '-':
|
||||
# Fixup content
|
||||
fixup_policy = self.params.get('fixup')
|
||||
if fixup_policy is None:
|
||||
fixup_policy = 'detect_or_warn'
|
||||
|
||||
INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
|
||||
|
||||
stretched_ratio = info_dict.get('stretched_ratio')
|
||||
if stretched_ratio is not None and stretched_ratio != 1:
|
||||
if fixup_policy == 'warn':
|
||||
@@ -1649,15 +1654,18 @@ class YoutubeDL(object):
|
||||
info_dict['__postprocessors'].append(stretched_pp)
|
||||
else:
|
||||
self.report_warning(
|
||||
'%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
|
||||
info_dict['id'], stretched_ratio))
|
||||
'%s: Non-uniform pixel ratio (%s). %s'
|
||||
% (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
|
||||
else:
|
||||
assert fixup_policy in ('ignore', 'never')
|
||||
|
||||
if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
|
||||
if (info_dict.get('requested_formats') is None and
|
||||
info_dict.get('container') == 'm4a_dash'):
|
||||
if fixup_policy == 'warn':
|
||||
self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
|
||||
info_dict['id']))
|
||||
self.report_warning(
|
||||
'%s: writing DASH m4a. '
|
||||
'Only some players support this container.'
|
||||
% info_dict['id'])
|
||||
elif fixup_policy == 'detect_or_warn':
|
||||
fixup_pp = FFmpegFixupM4aPP(self)
|
||||
if fixup_pp.available:
|
||||
@@ -1665,8 +1673,27 @@ class YoutubeDL(object):
|
||||
info_dict['__postprocessors'].append(fixup_pp)
|
||||
else:
|
||||
self.report_warning(
|
||||
'%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
|
||||
info_dict['id']))
|
||||
'%s: writing DASH m4a. '
|
||||
'Only some players support this container. %s'
|
||||
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
|
||||
else:
|
||||
assert fixup_policy in ('ignore', 'never')
|
||||
|
||||
if (info_dict.get('protocol') == 'm3u8_native' or
|
||||
info_dict.get('protocol') == 'm3u8' and
|
||||
self.params.get('hls_prefer_native')):
|
||||
if fixup_policy == 'warn':
|
||||
self.report_warning('%s: malformated aac bitstream.' % (
|
||||
info_dict['id']))
|
||||
elif fixup_policy == 'detect_or_warn':
|
||||
fixup_pp = FFmpegFixupM3u8PP(self)
|
||||
if fixup_pp.available:
|
||||
info_dict.setdefault('__postprocessors', [])
|
||||
info_dict['__postprocessors'].append(fixup_pp)
|
||||
else:
|
||||
self.report_warning(
|
||||
'%s: malformated aac bitstream. %s'
|
||||
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
|
||||
else:
|
||||
assert fixup_policy in ('ignore', 'never')
|
||||
|
||||
@@ -1830,7 +1857,9 @@ class YoutubeDL(object):
|
||||
if fdict.get('vbr') is not None:
|
||||
res += '%4dk' % fdict['vbr']
|
||||
if fdict.get('fps') is not None:
|
||||
res += ', %sfps' % fdict['fps']
|
||||
if res:
|
||||
res += ', '
|
||||
res += '%sfps' % fdict['fps']
|
||||
if fdict.get('acodec') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
@@ -1873,13 +1902,8 @@ class YoutubeDL(object):
|
||||
def list_thumbnails(self, info_dict):
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
if not thumbnails:
|
||||
tn_url = info_dict.get('thumbnail')
|
||||
if tn_url:
|
||||
thumbnails = [{'id': '0', 'url': tn_url}]
|
||||
else:
|
||||
self.to_screen(
|
||||
'[info] No thumbnails present for %s' % info_dict['id'])
|
||||
return
|
||||
self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
|
||||
return
|
||||
|
||||
self.to_screen(
|
||||
'[info] Thumbnails for %s:' % info_dict['id'])
|
||||
|
@@ -355,6 +355,7 @@ def _real_main(argv=None):
|
||||
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
||||
'encoding': opts.encoding,
|
||||
'extract_flat': opts.extract_flat,
|
||||
'mark_watched': opts.mark_watched,
|
||||
'merge_output_format': opts.merge_output_format,
|
||||
'postprocessors': postprocessors,
|
||||
'fixup': opts.fixup,
|
||||
|
@@ -77,6 +77,11 @@ try:
|
||||
except ImportError: # Python 2
|
||||
from urllib import urlretrieve as compat_urlretrieve
|
||||
|
||||
try:
|
||||
from html.parser import HTMLParser as compat_HTMLParser
|
||||
except ImportError: # Python 2
|
||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||
|
||||
|
||||
try:
|
||||
from subprocess import DEVNULL
|
||||
@@ -251,6 +256,16 @@ else:
|
||||
el.text = el.text.decode('utf-8')
|
||||
return doc
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||
# .//node does not match if a node is a direct child of . !
|
||||
def compat_xpath(xpath):
|
||||
if isinstance(xpath, compat_str):
|
||||
xpath = xpath.encode('ascii')
|
||||
return xpath
|
||||
else:
|
||||
compat_xpath = lambda xpath: xpath
|
||||
|
||||
try:
|
||||
from urllib.parse import parse_qs as compat_parse_qs
|
||||
except ImportError: # Python 2
|
||||
@@ -326,6 +341,9 @@ def compat_ord(c):
|
||||
return ord(c)
|
||||
|
||||
|
||||
compat_os_name = os._name if os.name == 'java' else os.name
|
||||
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
compat_getenv = os.getenv
|
||||
compat_expanduser = os.path.expanduser
|
||||
@@ -346,7 +364,7 @@ else:
|
||||
# The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
|
||||
# for different platforms with correct environment variables decoding.
|
||||
|
||||
if os.name == 'posix':
|
||||
if compat_os_name == 'posix':
|
||||
def compat_expanduser(path):
|
||||
"""Expand ~ and ~user constructions. If user or $HOME is unknown,
|
||||
do nothing."""
|
||||
@@ -370,7 +388,7 @@ else:
|
||||
userhome = pwent.pw_dir
|
||||
userhome = userhome.rstrip('/')
|
||||
return (userhome + path[i:]) or '/'
|
||||
elif os.name == 'nt' or os.name == 'ce':
|
||||
elif compat_os_name == 'nt' or compat_os_name == 'ce':
|
||||
def compat_expanduser(path):
|
||||
"""Expand ~ and ~user constructs.
|
||||
|
||||
@@ -540,6 +558,7 @@ else:
|
||||
from tokenize import generate_tokens as compat_tokenize_tokenize
|
||||
|
||||
__all__ = [
|
||||
'compat_HTMLParser',
|
||||
'compat_HTTPError',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
@@ -556,6 +575,7 @@ __all__ = [
|
||||
'compat_itertools_count',
|
||||
'compat_kwargs',
|
||||
'compat_ord',
|
||||
'compat_os_name',
|
||||
'compat_parse_qs',
|
||||
'compat_print',
|
||||
'compat_shlex_split',
|
||||
@@ -575,6 +595,7 @@ __all__ = [
|
||||
'compat_urlparse',
|
||||
'compat_urlretrieve',
|
||||
'compat_xml_parse_error',
|
||||
'compat_xpath',
|
||||
'shlex_quote',
|
||||
'subprocess_check_output',
|
||||
'workaround_optparse_bug9161',
|
||||
|
@@ -1,14 +1,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import FileDownloader
|
||||
from .external import get_external_downloader
|
||||
from .f4m import F4mFD
|
||||
from .hls import HlsFD
|
||||
from .hls import NativeHlsFD
|
||||
from .http import HttpFD
|
||||
from .rtsp import RtspFD
|
||||
from .rtmp import RtmpFD
|
||||
from .dash import DashSegmentsFD
|
||||
from .rtsp import RtspFD
|
||||
from .external import (
|
||||
get_external_downloader,
|
||||
FFmpegFD,
|
||||
)
|
||||
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
@@ -16,8 +18,8 @@ from ..utils import (
|
||||
|
||||
PROTOCOL_MAP = {
|
||||
'rtmp': RtmpFD,
|
||||
'm3u8_native': NativeHlsFD,
|
||||
'm3u8': HlsFD,
|
||||
'm3u8_native': HlsFD,
|
||||
'm3u8': FFmpegFD,
|
||||
'mms': RtspFD,
|
||||
'rtsp': RtspFD,
|
||||
'f4m': F4mFD,
|
||||
@@ -30,14 +32,17 @@ def get_suitable_downloader(info_dict, params={}):
|
||||
protocol = determine_protocol(info_dict)
|
||||
info_dict['protocol'] = protocol
|
||||
|
||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||
# return FFmpegFD
|
||||
|
||||
external_downloader = params.get('external_downloader')
|
||||
if external_downloader is not None:
|
||||
ed = get_external_downloader(external_downloader)
|
||||
if ed.supports(info_dict):
|
||||
if ed.can_download(info_dict):
|
||||
return ed
|
||||
|
||||
if protocol == 'm3u8' and params.get('hls_prefer_native'):
|
||||
return NativeHlsFD
|
||||
return HlsFD
|
||||
|
||||
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||
|
||||
|
@@ -5,6 +5,7 @@ import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
from ..compat import compat_os_name
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
@@ -219,7 +220,7 @@ class FileDownloader(object):
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(fullmsg)
|
||||
else:
|
||||
if os.name == 'nt':
|
||||
if compat_os_name == 'nt':
|
||||
prev_len = getattr(self, '_report_progress_prev_line_length',
|
||||
0)
|
||||
if prev_len > len(fullmsg):
|
||||
|
@@ -2,8 +2,11 @@ from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import subprocess
|
||||
import sys
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
|
||||
from ..utils import (
|
||||
cli_option,
|
||||
cli_valueless_option,
|
||||
@@ -11,6 +14,8 @@ from ..utils import (
|
||||
cli_configuration_args,
|
||||
encodeFilename,
|
||||
encodeArgument,
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
)
|
||||
|
||||
|
||||
@@ -45,10 +50,18 @@ class ExternalFD(FileDownloader):
|
||||
def exe(self):
|
||||
return self.params.get('external_downloader')
|
||||
|
||||
@classmethod
|
||||
def available(cls):
|
||||
return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
|
||||
|
||||
@classmethod
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||
|
||||
@classmethod
|
||||
def can_download(cls, info_dict):
|
||||
return cls.available() and cls.supports(info_dict)
|
||||
|
||||
def _option(self, command_option, param):
|
||||
return cli_option(self.params, command_option, param)
|
||||
|
||||
@@ -76,6 +89,8 @@ class ExternalFD(FileDownloader):
|
||||
|
||||
|
||||
class CurlFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-V'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
@@ -89,6 +104,8 @@ class CurlFD(ExternalFD):
|
||||
|
||||
|
||||
class AxelFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-V'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
@@ -99,6 +116,8 @@ class AxelFD(ExternalFD):
|
||||
|
||||
|
||||
class WgetFD(ExternalFD):
|
||||
AVAILABLE_OPT = '--version'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
@@ -112,6 +131,8 @@ class WgetFD(ExternalFD):
|
||||
|
||||
|
||||
class Aria2cFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-v'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-c']
|
||||
cmd += self._configuration_args([
|
||||
@@ -130,12 +151,112 @@ class Aria2cFD(ExternalFD):
|
||||
|
||||
|
||||
class HttpieFD(ExternalFD):
|
||||
@classmethod
|
||||
def available(cls):
|
||||
return check_executable('http', ['--version'])
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['%s:%s' % (key, val)]
|
||||
return cmd
|
||||
|
||||
|
||||
class FFmpegFD(ExternalFD):
|
||||
@classmethod
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
|
||||
|
||||
@classmethod
|
||||
def available(cls):
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
url = info_dict['url']
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
if not ffpp.available:
|
||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
return False
|
||||
ffpp.check_version()
|
||||
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
args += self._configuration_args()
|
||||
|
||||
# start_time = info_dict.get('start_time') or 0
|
||||
# if start_time:
|
||||
# args += ['-ss', compat_str(start_time)]
|
||||
# end_time = info_dict.get('end_time')
|
||||
# if end_time:
|
||||
# args += ['-t', compat_str(end_time - start_time)]
|
||||
|
||||
if info_dict['http_headers'] and re.match(r'^https?://', url):
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
||||
args += [
|
||||
'-headers',
|
||||
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
||||
|
||||
protocol = info_dict.get('protocol')
|
||||
|
||||
if protocol == 'rtmp':
|
||||
player_url = info_dict.get('player_url')
|
||||
page_url = info_dict.get('page_url')
|
||||
app = info_dict.get('app')
|
||||
play_path = info_dict.get('play_path')
|
||||
tc_url = info_dict.get('tc_url')
|
||||
flash_version = info_dict.get('flash_version')
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
if player_url is not None:
|
||||
args += ['-rtmp_swfverify', player_url]
|
||||
if page_url is not None:
|
||||
args += ['-rtmp_pageurl', page_url]
|
||||
if app is not None:
|
||||
args += ['-rtmp_app', app]
|
||||
if play_path is not None:
|
||||
args += ['-rtmp_playpath', play_path]
|
||||
if tc_url is not None:
|
||||
args += ['-rtmp_tcurl', tc_url]
|
||||
if flash_version is not None:
|
||||
args += ['-rtmp_flashver', flash_version]
|
||||
if live:
|
||||
args += ['-rtmp_live', 'live']
|
||||
|
||||
args += ['-i', url, '-c', 'copy']
|
||||
if protocol == 'm3u8':
|
||||
if self.params.get('hls_use_mpegts', False):
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||
elif protocol == 'rtmp':
|
||||
args += ['-f', 'flv']
|
||||
else:
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
proc = subprocess.Popen(args, stdin=subprocess.PIPE)
|
||||
try:
|
||||
retval = proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
# subprocces.run would send the SIGKILL signal to ffmpeg and the
|
||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
||||
# produces a file that is playable (this is mostly useful for live
|
||||
# streams). Note that Windows is not affected and produces playable
|
||||
# files (see https://github.com/rg3/youtube-dl/issues/8300).
|
||||
if sys.platform != 'win32':
|
||||
proc.communicate(b'q')
|
||||
raise
|
||||
return retval
|
||||
|
||||
|
||||
class AVconvFD(FFmpegFD):
|
||||
pass
|
||||
|
||||
_BY_NAME = dict(
|
||||
(klass.get_basename(), klass)
|
||||
for name, klass in globals().items()
|
||||
|
@@ -99,7 +99,8 @@ class FragmentFD(FileDownloader):
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size,
|
||||
state['downloaded_bytes'])
|
||||
state['speed'] = s.get('speed')
|
||||
state['speed'] = s.get('speed') or ctx.get('speed')
|
||||
ctx['speed'] = state['speed']
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
self._hook_progress(state)
|
||||
|
||||
|
@@ -1,87 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import os.path
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from .common import FileDownloader
|
||||
from .fragment import FragmentFD
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
handle_youtubedl_headers,
|
||||
)
|
||||
|
||||
|
||||
class HlsFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
if not ffpp.available:
|
||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
return False
|
||||
ffpp.check_version()
|
||||
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
if info_dict['http_headers'] and re.match(r'^https?://', url):
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
||||
args += [
|
||||
'-headers',
|
||||
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
||||
|
||||
args += ['-i', url, '-c', 'copy']
|
||||
if self.params.get('hls_use_mpegts', False):
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
proc = subprocess.Popen(args, stdin=subprocess.PIPE)
|
||||
try:
|
||||
retval = proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
# subprocces.run would send the SIGKILL signal to ffmpeg and the
|
||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
||||
# produces a file that is playable (this is mostly useful for live
|
||||
# streams). Note that Windows is not affected and produces playable
|
||||
# files (see https://github.com/rg3/youtube-dl/issues/8300).
|
||||
if sys.platform != 'win32':
|
||||
proc.communicate(b'q')
|
||||
raise
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('%s exited with code %d' % (ffpp.basename, retval))
|
||||
return False
|
||||
|
||||
|
||||
class NativeHlsFD(FragmentFD):
|
||||
""" A more limited implementation that does not require ffmpeg """
|
||||
class HlsFD(FragmentFD):
|
||||
""" A limited implementation that does not require ffmpeg """
|
||||
|
||||
FD_NAME = 'hlsnative'
|
||||
|
||||
|
@@ -23,7 +23,10 @@ from .alphaporno import AlphaPornoIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .aol import (
|
||||
AolIE,
|
||||
AolFeaturesIE,
|
||||
)
|
||||
from .allocine import AllocineIE
|
||||
from .aparat import AparatIE
|
||||
from .appleconnect import AppleConnectIE
|
||||
@@ -51,6 +54,7 @@ from .arte import (
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .audimedia import AudiMediaIE
|
||||
from .audioboom import AudioBoomIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
@@ -74,8 +78,10 @@ from .bleacherreport import (
|
||||
)
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bpb import BpbIE
|
||||
from .br import BRIE
|
||||
from .bravotv import BravoTVIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
@@ -184,6 +190,10 @@ from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .dropbox import DropboxIE
|
||||
from .dw import (
|
||||
DWIE,
|
||||
DWArticleIE,
|
||||
)
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .echomsk import EchoMskIE
|
||||
@@ -208,10 +218,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .facebook import (
|
||||
FacebookIE,
|
||||
FacebookPostIE,
|
||||
)
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .fczenit import FczenitIE
|
||||
@@ -339,6 +346,7 @@ from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kusi import KUSIIE
|
||||
from .kuwo import (
|
||||
KuwoIE,
|
||||
KuwoAlbumIE,
|
||||
@@ -351,10 +359,9 @@ from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lemonde import LemondeIE
|
||||
from .letv import (
|
||||
LetvIE,
|
||||
LetvTvIE,
|
||||
LetvPlaylistIE,
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
LePlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .libsyn import LibsynIE
|
||||
@@ -383,6 +390,7 @@ from .lynda import (
|
||||
from .m6 import M6IE
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .matchtv import MatchTVIE
|
||||
@@ -392,6 +400,7 @@ from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .minhateca import MinhatecaIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
@@ -505,6 +514,7 @@ from .npr import NprIE
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
NRKPlaylistIE,
|
||||
NRKSkoleIE,
|
||||
NRKTVIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
@@ -589,6 +599,7 @@ from .regiotv import RegioTVIE
|
||||
from .restudy import RestudyIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .revision3 import Revision3IE
|
||||
from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
@@ -669,7 +680,6 @@ from .southpark import (
|
||||
SouthParkEsIE,
|
||||
SouthParkNlIE
|
||||
)
|
||||
from .space import SpaceIE
|
||||
from .spankbang import SpankBangIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
@@ -728,6 +738,7 @@ from .theplatform import (
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
@@ -737,6 +748,7 @@ from .tmz import (
|
||||
TMZArticleIE,
|
||||
)
|
||||
from .tnaflix import (
|
||||
TNAFlixNetworkEmbedIE,
|
||||
TNAFlixIE,
|
||||
EMPFlixIE,
|
||||
MovieFapIE,
|
||||
@@ -773,6 +785,7 @@ from .tv2 import (
|
||||
TV2IE,
|
||||
TV2ArticleIE,
|
||||
)
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tvc import (
|
||||
TVCIE,
|
||||
@@ -812,7 +825,9 @@ from .udn import UDNEmbedIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .usatoday import USATodayIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .ustudio import UstudioIE
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
@@ -826,7 +841,10 @@ from .vgtv import (
|
||||
VGTVIE,
|
||||
)
|
||||
from .vh1 import VH1IE
|
||||
from .vice import ViceIE
|
||||
from .vice import (
|
||||
ViceIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
@@ -853,6 +871,7 @@ from .vimeo import (
|
||||
VimeoChannelIE,
|
||||
VimeoGroupsIE,
|
||||
VimeoLikesIE,
|
||||
VimeoOndemandIE,
|
||||
VimeoReviewIE,
|
||||
VimeoUserIE,
|
||||
VimeoWatchLaterIE,
|
||||
|
@@ -13,24 +13,18 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The Slum - Episode 1: Deliverance',
|
||||
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
|
||||
'uploader': 'Al Jazeera English',
|
||||
'uploader_id': '665003303001',
|
||||
'timestamp': 1411116829,
|
||||
'upload_date': '20140919',
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, program_name)
|
||||
brightcove_id = self._search_regex(
|
||||
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': (
|
||||
'brightcove:'
|
||||
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
|
||||
'&%40videoPlayer={0}'.format(brightcove_id)
|
||||
),
|
||||
'ie_key': 'BrightcoveLegacy',
|
||||
}
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
|
@@ -18,7 +18,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
'info_dict': {
|
||||
'id': '161',
|
||||
@@ -26,7 +26,15 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
}, {
|
||||
# Film wording is used instead of Episode
|
||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Episodes without titles
|
||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
@@ -91,14 +99,22 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
|
||||
for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage):
|
||||
m = re.search(
|
||||
r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html)
|
||||
if not m:
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(m.group('number'))
|
||||
episode_title = m.group('title')
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
|
@@ -1,24 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AolIE(InfoExtractor):
|
||||
IE_NAME = 'on.aol.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
aol-video:|
|
||||
http://on\.aol\.com/
|
||||
(?:
|
||||
video/.*-|
|
||||
playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
|
||||
)
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
(?:$|\?)
|
||||
'''
|
||||
_VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
|
||||
@@ -29,42 +16,31 @@ class AolIE(InfoExtractor):
|
||||
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
}, {
|
||||
'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
|
||||
'info_dict': {
|
||||
'id': '152147',
|
||||
'title': 'Brace Yourself - Today\'s Weirdest News',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
playlist_id = mobj.group('playlist_id')
|
||||
if not playlist_id or self._downloader.params.get('noplaylist'):
|
||||
return self.url_result('5min:%s' % video_id)
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result('5min:%s' % video_id)
|
||||
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
|
||||
playlist_html = self._search_regex(
|
||||
r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
|
||||
'playlist HTML')
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': 'aol-video:%s' % m.group('id'),
|
||||
'ie_key': 'Aol',
|
||||
} for m in re.finditer(
|
||||
r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
|
||||
playlist_html)]
|
||||
class AolFeaturesIE(InfoExtractor):
|
||||
IE_NAME = 'features.aol.com'
|
||||
_VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)'
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'display_id': mobj.group('playlist_display_id'),
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
|
||||
'md5': '7db483bb0c09c85e241f84a34238cc75',
|
||||
'info_dict': {
|
||||
'id': '519507715',
|
||||
'ext': 'mp4',
|
||||
'title': 'What To Watch - February 17, 2016',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return self.url_result(self._search_regex(
|
||||
r'<script type="text/javascript" src="(https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js[^"]+)"',
|
||||
webpage, '5min embed url'), 'FiveMin')
|
||||
|
@@ -121,15 +121,18 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
if json_url:
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
# Differend kind of embed URL (e.g.
|
||||
title = self._search_regex(
|
||||
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
webpage, 'title', default=None, group='title')
|
||||
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
||||
# Different kind of embed URL (e.g.
|
||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'embed url', group='url')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||
info = self._download_json(json_url, video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
|
||||
@@ -137,7 +140,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
if not upload_date_str:
|
||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||
|
||||
title = player_info['VTI'].strip()
|
||||
title = (player_info.get('VTI') or title or player_info['VID']).strip()
|
||||
subtitle = player_info.get('VSU', '').strip()
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
@@ -10,9 +10,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class AudiMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
|
||||
'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
|
||||
'md5': '79a8b71c46d49042609795ab59779b66',
|
||||
'info_dict': {
|
||||
'id': '1565',
|
||||
@@ -32,7 +32,10 @@ class AudiMediaIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload')
|
||||
raw_payload = self._search_regex([
|
||||
r'class="amtv-embed"[^>]+id="([^"]+)"',
|
||||
r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
|
||||
], webpage, 'raw payload')
|
||||
_, stage_mode, video_id, lang = raw_payload.split('-')
|
||||
|
||||
# TODO: handle s and e stage_mode (live streams and ended live streams)
|
||||
@@ -59,13 +62,19 @@ class AudiMediaIE(InfoExtractor):
|
||||
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
|
||||
if not video_version_url:
|
||||
continue
|
||||
formats.append({
|
||||
f = {
|
||||
'url': video_version_url,
|
||||
'width': int_or_none(video_version.get('width')),
|
||||
'height': int_or_none(video_version.get('height')),
|
||||
'abr': int_or_none(video_version.get('audio_bitrate')),
|
||||
'vbr': int_or_none(video_version.get('video_bitrate')),
|
||||
})
|
||||
}
|
||||
bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
|
||||
if bitrate:
|
||||
f.update({
|
||||
'format_id': 'http-%s' % bitrate,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
66
youtube_dl/extractor/audioboom.py
Normal file
66
youtube_dl/extractor/audioboom.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class AudioBoomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
|
||||
'info_dict': {
|
||||
'id': '4279833',
|
||||
'ext': 'mp3',
|
||||
'title': '3/09/2016 Czaban Hour 3',
|
||||
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
|
||||
'duration': 2245.72,
|
||||
'uploader': 'Steve Czaban',
|
||||
'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
clip = None
|
||||
|
||||
clip_store = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id,
|
||||
webpage, 'clip store', default='{}', group='json'),
|
||||
video_id, fatal=False)
|
||||
if clip_store:
|
||||
clips = clip_store.get('clips')
|
||||
if clips and isinstance(clips, list) and isinstance(clips[0], dict):
|
||||
clip = clips[0]
|
||||
|
||||
def from_clip(field):
|
||||
if clip:
|
||||
clip.get(field)
|
||||
|
||||
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
|
||||
'audio', webpage, 'audio url')
|
||||
title = from_clip('title') or self._og_search_title(webpage)
|
||||
description = from_clip('description') or self._og_search_description(webpage)
|
||||
|
||||
duration = float_or_none(from_clip('duration') or self._html_search_meta(
|
||||
'weibo:audio:duration', webpage))
|
||||
|
||||
uploader = from_clip('author') or self._og_search_property(
|
||||
'audio:artist', webpage, 'uploader', fatal=False)
|
||||
uploader_url = from_clip('author_url') or self._html_search_meta(
|
||||
'audioboo:channel', webpage, 'uploader url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_url': uploader_url,
|
||||
}
|
@@ -10,7 +10,6 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..compat import (
|
||||
@@ -561,7 +560,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
|
||||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
'title': 'BBC Blogs - Adam Curtis - BUGGER',
|
||||
'title': 'BUGGER',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
@@ -670,9 +669,17 @@ class BBCIE(BBCCoUkIE):
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': '34475836',
|
||||
'title': 'What Liverpool can expect from Klopp',
|
||||
'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# school report article with single video
|
||||
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
||||
'info_dict': {
|
||||
'id': '35744779',
|
||||
'title': 'School which breaks down barriers in Jerusalem',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
@@ -735,8 +742,17 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
|
||||
timestamp = json_ld_info.get('timestamp')
|
||||
|
||||
playlist_title = json_ld_info.get('title')
|
||||
playlist_description = json_ld_info.get('description')
|
||||
if not playlist_title:
|
||||
playlist_title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
|
||||
if playlist_title:
|
||||
playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
|
||||
|
||||
playlist_description = json_ld_info.get(
|
||||
'description') or self._og_search_description(webpage, default=None)
|
||||
|
||||
if not timestamp:
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
@@ -797,8 +813,6 @@ class BBCIE(BBCCoUkIE):
|
||||
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
|
||||
|
||||
if entries:
|
||||
playlist_title = playlist_title or remove_end(self._og_search_title(webpage), ' - BBC News')
|
||||
playlist_description = playlist_description or self._og_search_description(webpage, default=None)
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
@@ -829,10 +843,6 @@ class BBCIE(BBCCoUkIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
|
@@ -28,10 +28,10 @@ class BleacherReportIE(InfoExtractor):
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
|
||||
'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
|
||||
'md5': '6a5cd403418c7b01719248ca97fb0692',
|
||||
'info_dict': {
|
||||
'id': '2586817',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
|
||||
'timestamp': 1446839961,
|
||||
'uploader': 'Sean Fay',
|
||||
@@ -93,10 +93,14 @@ class BleacherReportCMSIE(AMPIE):
|
||||
'md5': '8c2c12e3af7805152675446c905d159b',
|
||||
'info_dict': {
|
||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
||||
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
60
youtube_dl/extractor/bokecc.py
Normal file
60
youtube_dl/extractor/bokecc.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BokeCCBaseIE(InfoExtractor):
|
||||
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
|
||||
player_params_str = self._html_search_regex(
|
||||
r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
|
||||
webpage, 'player params')
|
||||
|
||||
player_params = compat_parse_qs(player_params_str)
|
||||
|
||||
info_xml = self._download_xml(
|
||||
'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
|
||||
player_params['siteid'][0], player_params['vid'][0]), video_id)
|
||||
|
||||
formats = [{
|
||||
'format_id': format_id,
|
||||
'url': quality.find('./copy').attrib['playurl'],
|
||||
'preference': int(quality.attrib['value']),
|
||||
} for quality in info_xml.findall('./video/quality')]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
|
||||
class BokeCCIE(BokeCCBaseIE):
|
||||
_IE_DESC = 'CC视频'
|
||||
_VALID_URL = r'http://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',
|
||||
'info_dict': {
|
||||
'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30',
|
||||
'ext': 'flv',
|
||||
'title': 'BokeCC Video',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
|
||||
if not qs.get('vid') or not qs.get('uid'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0])
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'BokeCC Video', # no title provided in the webpage
|
||||
'formats': self._extract_bokecc_formats(webpage, video_id),
|
||||
}
|
28
youtube_dl/extractor/bravotv.py
Normal file
28
youtube_dl/extractor/bravotv.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class BravoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
|
||||
'md5': 'd60cdf68904e854fac669bd26cccf801',
|
||||
'info_dict': {
|
||||
'id': 'LitrBdX64qLn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Last Chance Kitchen Returns',
|
||||
'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
|
||||
release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
|
||||
{'force_smil_url': True}), 'ThePlatform', release_pid)
|
@@ -9,10 +9,10 @@ from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -23,16 +23,16 @@ from ..utils import (
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class BrightcoveLegacyIE(InfoExtractor):
|
||||
IE_NAME = 'brightcove:legacy'
|
||||
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||
_FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -155,8 +155,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
# Not all pages define this value
|
||||
if playerKey is not None:
|
||||
params['playerKey'] = playerKey
|
||||
# The three fields hold the id of the video
|
||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
|
||||
# These fields hold the id of the video
|
||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
|
||||
if videoPlayer is not None:
|
||||
params['@videoPlayer'] = videoPlayer
|
||||
linkBase = find_param('linkBaseURL')
|
||||
@@ -184,8 +184,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _make_brightcove_url(cls, params):
|
||||
data = compat_urllib_parse.urlencode(params)
|
||||
return cls._FEDERATED_URL_TEMPLATE % data
|
||||
return update_url_query(cls._FEDERATED_URL, params)
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
@@ -239,7 +238,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
# We set the original url as the default 'Referer' header
|
||||
referer = smuggled_data.get('Referer', url)
|
||||
return self._get_video_info(
|
||||
videoPlayer[0], query_str, query, referer=referer)
|
||||
videoPlayer[0], query, referer=referer)
|
||||
elif 'playerKey' in query:
|
||||
player_key = query['playerKey']
|
||||
return self._get_playlist_info(player_key[0])
|
||||
@@ -248,15 +247,14 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||
expected=True)
|
||||
|
||||
def _get_video_info(self, video_id, query_str, query, referer=None):
|
||||
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
||||
req = sanitized_Request(request_url)
|
||||
def _get_video_info(self, video_id, query, referer=None):
|
||||
headers = {}
|
||||
linkBase = query.get('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
referer = linkBase[0]
|
||||
if referer is not None:
|
||||
req.add_header('Referer', referer)
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
headers['Referer'] = referer
|
||||
webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
||||
@@ -355,7 +353,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
class BrightcoveNewIE(InfoExtractor):
|
||||
IE_NAME = 'brightcove:new'
|
||||
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>(?:ref:)?\d+)'
|
||||
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
|
||||
'md5': 'c8100925723840d4b0d243f7025703be',
|
||||
@@ -391,6 +389,10 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
# ref: prefixed video id
|
||||
'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# non numeric ref: prefixed video id
|
||||
'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -410,8 +412,8 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
|
||||
# Look for iframe embeds [1]
|
||||
for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url)
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||
|
||||
# Look for embed_in_page embeds [2]
|
||||
for video_id, account_id, player_id, embed in re.findall(
|
||||
@@ -420,11 +422,11 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
# According to [4] data-video-id may be prefixed with ref:
|
||||
r'''(?sx)
|
||||
<video[^>]+
|
||||
data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*?
|
||||
data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
|
||||
</video>.*?
|
||||
<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
(\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
|
||||
(\d+)/([\da-f-]+)_([^/]+)/index(?:\.min)?\.js
|
||||
''', webpage):
|
||||
entries.append(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
@@ -454,24 +456,33 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
req = sanitized_Request(
|
||||
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
|
||||
% (account_id, video_id),
|
||||
headers={'Accept': 'application/json;pk=%s' % policy_key})
|
||||
json_data = self._download_json(req, video_id)
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
|
||||
try:
|
||||
json_data = self._download_json(api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)
|
||||
raise ExtractorError(json_data[0]['message'], expected=True)
|
||||
raise
|
||||
|
||||
title = json_data['name']
|
||||
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
container = source.get('container')
|
||||
source_type = source.get('type')
|
||||
src = source.get('src')
|
||||
if source_type == 'application/x-mpegURL':
|
||||
if source_type == 'application/x-mpegURL' or container == 'M2TS':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
src, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'application/dash+xml':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
|
||||
else:
|
||||
streaming_src = source.get('streaming_src')
|
||||
stream_name, app_name = source.get('stream_name'), source.get('app_name')
|
||||
@@ -479,15 +490,23 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
continue
|
||||
tbr = float_or_none(source.get('avg_bitrate'), 1000)
|
||||
height = int_or_none(source.get('height'))
|
||||
width = int_or_none(source.get('width'))
|
||||
f = {
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'container': source.get('container'),
|
||||
'vcodec': source.get('codec'),
|
||||
'ext': source.get('container').lower(),
|
||||
'container': container,
|
||||
'ext': container.lower(),
|
||||
}
|
||||
if width == 0 and height == 0:
|
||||
f.update({
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': source.get('codec'),
|
||||
})
|
||||
|
||||
def build_format_id(kind):
|
||||
format_id = kind
|
||||
|
@@ -4,12 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class C56IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
|
||||
IE_NAME = '56.com'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
||||
'md5': 'e59995ac63d0457783ea05f93f12a866',
|
||||
'info_dict': {
|
||||
@@ -18,12 +19,29 @@ class C56IE(InfoExtractor):
|
||||
'title': '网事知多少 第32期:车怒',
|
||||
'duration': 283.813,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '82247482',
|
||||
'title': '爱的诅咒之杜鹃花开',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
'add_ie': ['Sohu'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
text_id = mobj.group('textid')
|
||||
|
||||
webpage = self._download_webpage(url, text_id)
|
||||
sohu_video_info_str = self._search_regex(
|
||||
r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None)
|
||||
if sohu_video_info_str:
|
||||
sohu_video_info = self._parse_json(
|
||||
sohu_video_info_str, text_id, transform_source=js_to_json)
|
||||
return self.url_result(sohu_video_info['url'], 'Sohu')
|
||||
|
||||
page = self._download_json(
|
||||
'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
|
||||
|
||||
|
@@ -78,7 +78,7 @@ class CBSNewsIE(ThePlatformIE):
|
||||
pid = item.get('media' + format_id)
|
||||
if not pid:
|
||||
continue
|
||||
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
|
||||
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
|
@@ -21,6 +21,10 @@ class CinemassacreIE(InfoExtractor):
|
||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
@@ -31,14 +35,18 @@ class CinemassacreIE(InfoExtractor):
|
||||
'upload_date': '20131002',
|
||||
'title': 'The Mummy’s Hand (1940)',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Youtube embedded video
|
||||
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
|
||||
'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
|
||||
'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9',
|
||||
'info_dict': {
|
||||
'id': 'OEVzPCY2T-g',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
|
||||
'upload_date': '20061207',
|
||||
'uploader': 'Cinemassacre',
|
||||
@@ -49,12 +57,12 @@ class CinemassacreIE(InfoExtractor):
|
||||
{
|
||||
# Youtube embedded video
|
||||
'url': 'http://cinemassacre.com/2006/09/01/mckids/',
|
||||
'md5': '6eb30961fa795fedc750eac4881ad2e1',
|
||||
'md5': '7393c4e0f54602ad110c793eb7a6513a',
|
||||
'info_dict': {
|
||||
'id': 'FnxsNhuikpo',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'upload_date': '20060901',
|
||||
'uploader': 'Cinemassacre Extras',
|
||||
'uploader': 'Cinemassacre Extra',
|
||||
'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
|
||||
'uploader_id': 'Cinemassacre',
|
||||
'title': 'AVGN: McKids',
|
||||
@@ -69,7 +77,11 @@ class CinemassacreIE(InfoExtractor):
|
||||
'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
|
||||
'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
|
||||
'upload_date': '20150525',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
@@ -51,9 +51,7 @@ class CNETIE(ThePlatformIE):
|
||||
uploader = None
|
||||
uploader_id = None
|
||||
|
||||
mpx_account = data['config']['uvpConfig']['default']['mpx_account']
|
||||
|
||||
metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
|
||||
metadata = self.get_metadata('kYEXFC/%s' % list(vdata['files'].values())[0], video_id)
|
||||
description = vdata.get('description') or metadata.get('description')
|
||||
duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
|
||||
|
||||
@@ -62,7 +60,7 @@ class CNETIE(ThePlatformIE):
|
||||
for (fkey, vid) in vdata['files'].items():
|
||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
||||
continue
|
||||
release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid)
|
||||
release_url = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' % vid
|
||||
if fkey == 'hds':
|
||||
release_url += '&manifest=f4m'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
|
||||
|
@@ -15,13 +15,14 @@ import math
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_cookies,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
compat_http_client,
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
compat_etree_fromstring,
|
||||
)
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
@@ -47,6 +48,7 @@ from ..utils import (
|
||||
determine_protocol,
|
||||
parse_duration,
|
||||
mimetype2ext,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -104,7 +106,7 @@ class InfoExtractor(object):
|
||||
* protocol The protocol that will be used for the actual
|
||||
download, lower-case.
|
||||
"http", "https", "rtsp", "rtmp", "rtmpe",
|
||||
"m3u8", or "m3u8_native".
|
||||
"m3u8", "m3u8_native" or "http_dash_segments".
|
||||
* preference Order number of this format. If this field is
|
||||
present and not None, the formats get sorted
|
||||
by this field, regardless of all other values.
|
||||
@@ -157,12 +159,14 @@ class InfoExtractor(object):
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
license: License name the video is licensed under.
|
||||
creator: The main artist who created the video.
|
||||
release_date: The date (YYYYMMDD) when the video was released.
|
||||
timestamp: UNIX timestamp of the moment the video became available.
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp.
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
uploader_url: Full URL to a personal webpage of the video uploader.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{language: subformats}. "subformats" is a list sorted from
|
||||
@@ -342,7 +346,7 @@ class InfoExtractor(object):
|
||||
def IE_NAME(self):
|
||||
return compat_str(type(self).__name__[:-2])
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None):
|
||||
""" Returns the response handle """
|
||||
if note is None:
|
||||
self.report_download_webpage(video_id)
|
||||
@@ -351,6 +355,12 @@ class InfoExtractor(object):
|
||||
self.to_screen('%s' % (note,))
|
||||
else:
|
||||
self.to_screen('%s: %s' % (video_id, note))
|
||||
# data, headers and query params will be ignored for `Request` objects
|
||||
if isinstance(url_or_request, compat_str):
|
||||
if query:
|
||||
url_or_request = update_url_query(url_or_request, query)
|
||||
if data or headers:
|
||||
url_or_request = sanitized_Request(url_or_request, data, headers or {})
|
||||
try:
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
@@ -366,13 +376,13 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers=None, query=None):
|
||||
""" Returns a tuple (page content as string, URL handle) """
|
||||
# Strip hashes from the URL (#1038)
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
@@ -425,7 +435,7 @@ class InfoExtractor(object):
|
||||
self.to_screen('Saving request to ' + filename)
|
||||
# Working around MAX_PATH limitation on Windows (see
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||
if os.name == 'nt':
|
||||
if compat_os_name == 'nt':
|
||||
absfilepath = os.path.abspath(filename)
|
||||
if len(absfilepath) > 259:
|
||||
filename = '\\\\?\\' + absfilepath
|
||||
@@ -459,13 +469,13 @@ class InfoExtractor(object):
|
||||
|
||||
return content
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers=None, query=None):
|
||||
""" Returns the data of the page as a string """
|
||||
success = False
|
||||
try_count = 0
|
||||
while success is False:
|
||||
try:
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
|
||||
success = True
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
try_count += 1
|
||||
@@ -480,10 +490,10 @@ class InfoExtractor(object):
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None):
|
||||
transform_source=None, fatal=True, encoding=None, data=None, headers=None, query=None):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)
|
||||
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
|
||||
if xml_string is False:
|
||||
return xml_string
|
||||
if transform_source:
|
||||
@@ -494,10 +504,10 @@ class InfoExtractor(object):
|
||||
note='Downloading JSON metadata',
|
||||
errnote='Unable to download JSON metadata',
|
||||
transform_source=None,
|
||||
fatal=True, encoding=None):
|
||||
fatal=True, encoding=None, data=None, headers=None, query=None):
|
||||
json_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding)
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
if (not fatal) and json_string is False:
|
||||
return None
|
||||
return self._parse_json(
|
||||
@@ -594,7 +604,7 @@ class InfoExtractor(object):
|
||||
if mobj:
|
||||
break
|
||||
|
||||
if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
|
||||
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
||||
_name = '\033[0;34m%s\033[0m' % name
|
||||
else:
|
||||
_name = name
|
||||
@@ -852,6 +862,7 @@ class InfoExtractor(object):
|
||||
proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1
|
||||
|
||||
if f.get('vcodec') == 'none': # audio only
|
||||
preference -= 50
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
|
||||
else:
|
||||
@@ -862,6 +873,8 @@ class InfoExtractor(object):
|
||||
except ValueError:
|
||||
audio_ext_preference = -1
|
||||
else:
|
||||
if f.get('acodec') == 'none': # video only
|
||||
preference -= 40
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
ORDER = ['flv', 'mp4', 'webm']
|
||||
else:
|
||||
@@ -963,6 +976,13 @@ class InfoExtractor(object):
|
||||
if manifest is False:
|
||||
return []
|
||||
|
||||
return self._parse_f4m_formats(
|
||||
manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||
transform_source=transform_source, fatal=fatal)
|
||||
|
||||
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True):
|
||||
formats = []
|
||||
manifest_version = '1.0'
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
|
||||
@@ -988,7 +1008,8 @@ class InfoExtractor(object):
|
||||
# bitrate in f4m downloader
|
||||
if determine_ext(manifest_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url, video_id, preference, f4m_id, fatal=fatal))
|
||||
manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||
transform_source=transform_source, fatal=fatal))
|
||||
continue
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
formats.append({
|
||||
@@ -1033,11 +1054,21 @@ class InfoExtractor(object):
|
||||
return []
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
# A Media Playlist Tag MUST NOT appear in a Master Playlist
|
||||
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
|
||||
# The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists
|
||||
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc:
|
||||
|
||||
# We should try extracting formats only from master playlists [1], i.e.
|
||||
# playlists that describe available qualities. On the other hand media
|
||||
# playlists [2] should be returned as is since they contain just the media
|
||||
# without qualities renditions.
|
||||
# Fortunately, master playlist can be easily distinguished from media
|
||||
# playlist based on particular tags availability. As of [1, 2] master
|
||||
# playlist tags MUST NOT appear in a media playist and vice versa.
|
||||
# As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist
|
||||
# and MUST NOT appear in master playlist thus we can clearly detect media
|
||||
# playlist with this criterion.
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4
|
||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
||||
return [{
|
||||
'url': m3u8_url,
|
||||
'format_id': m3u8_id,
|
||||
@@ -1084,19 +1115,29 @@ class InfoExtractor(object):
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
# TODO: looks like video codec is not always necessarily goes first
|
||||
va_codecs = codecs.split(',')
|
||||
if va_codecs[0]:
|
||||
f['vcodec'] = va_codecs[0]
|
||||
if len(va_codecs) > 1 and va_codecs[1]:
|
||||
f['acodec'] = va_codecs[1]
|
||||
resolution = last_info.get('RESOLUTION')
|
||||
if resolution:
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
vcodec, acodec = [None] * 2
|
||||
va_codecs = codecs.split(',')
|
||||
if len(va_codecs) == 1:
|
||||
# Audio only entries usually come with single codec and
|
||||
# no resolution. For more robustness we also check it to
|
||||
# be mp4 audio.
|
||||
if not resolution and va_codecs[0].startswith('mp4a'):
|
||||
vcodec, acodec = 'none', va_codecs[0]
|
||||
else:
|
||||
vcodec = va_codecs[0]
|
||||
else:
|
||||
vcodec, acodec = va_codecs[:2]
|
||||
f.update({
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if last_media is not None:
|
||||
f['m3u8_media'] = last_media
|
||||
last_media = None
|
||||
@@ -1117,8 +1158,8 @@ class InfoExtractor(object):
|
||||
out.append('{%s}%s' % (namespace, c))
|
||||
return '/'.join(out)
|
||||
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
|
||||
|
||||
if smil is False:
|
||||
assert not fatal
|
||||
@@ -1135,10 +1176,10 @@ class InfoExtractor(object):
|
||||
return {}
|
||||
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||
|
||||
def _download_smil(self, smil_url, video_id, fatal=True):
|
||||
def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
|
||||
return self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file', fatal=fatal)
|
||||
'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
|
||||
|
||||
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
@@ -1424,8 +1465,9 @@ class InfoExtractor(object):
|
||||
continue
|
||||
representation_attrib = adaptation_set.attrib.copy()
|
||||
representation_attrib.update(representation.attrib)
|
||||
mime_type = representation_attrib.get('mimeType')
|
||||
content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType')
|
||||
# According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
|
||||
mime_type = representation_attrib['mimeType']
|
||||
content_type = mime_type.split('/')[0]
|
||||
if content_type == 'text':
|
||||
# TODO implement WebVTT downloading
|
||||
pass
|
||||
@@ -1448,6 +1490,7 @@ class InfoExtractor(object):
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
||||
@@ -1600,6 +1643,15 @@ class InfoExtractor(object):
|
||||
def _get_automatic_captions(self, *args, **kwargs):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def mark_watched(self, *args, **kwargs):
|
||||
if (self._downloader.params.get('mark_watched', False) and
|
||||
(self._get_login_info()[0] is not None or
|
||||
self._downloader.params.get('cookiefile') is not None)):
|
||||
self._mark_watched(*args, **kwargs)
|
||||
|
||||
def _mark_watched(self, *args, **kwargs):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -54,7 +54,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
||||
def _download_webpage(self, url_or_request, *args, **kwargs):
|
||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else sanitized_Request(url_or_request))
|
||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||
@@ -65,8 +65,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||
request.add_header('Accept-Language', '*')
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(
|
||||
request, video_id, note, errnote, fatal, tries, timeout, encoding)
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _add_skip_wall(url):
|
||||
|
@@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
|
||||
'description': 'md5:f34981259a03e980a3c6404190a3ed61',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
@@ -26,7 +26,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.douyutv.com/85982',
|
||||
'info_dict': {
|
||||
@@ -42,7 +42,24 @@ class DouyuTVIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'Romm not found',
|
||||
}, {
|
||||
'url': 'http://www.douyutv.com/17732',
|
||||
'info_dict': {
|
||||
'id': '17732',
|
||||
'display_id': '17732',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:f34981259a03e980a3c6404190a3ed61',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,44 +10,125 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'http://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
|
||||
'info_dict': {
|
||||
'id': '1255600',
|
||||
'display_id': 'stagione-1-episodio-25',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episodio 25',
|
||||
'description': 'md5:cae5f40ad988811b197d2d27a53227eb',
|
||||
'duration': 2761,
|
||||
'timestamp': 1454701800,
|
||||
'upload_date': '20160205',
|
||||
'creator': 'RTIT',
|
||||
'series': 'Take me out',
|
||||
'season_number': 1,
|
||||
'episode_number': 25,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
||||
'info_dict': {
|
||||
'id': '3172',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
|
||||
'ext': 'flv',
|
||||
'title': 'Svensken lär sig njuta av livet',
|
||||
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
|
||||
'duration': 2650,
|
||||
'timestamp': 1365454320,
|
||||
'upload_date': '20130408',
|
||||
'creator': 'Kanal 5 (Home)',
|
||||
'series': 'Nugammalt - 77 händelser som format Sverige',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
|
||||
'info_dict': {
|
||||
'id': '70816',
|
||||
'display_id': 'season-6-episode-12',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode 12',
|
||||
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
|
||||
'duration': 2563,
|
||||
'timestamp': 1429696800,
|
||||
'upload_date': '20150422',
|
||||
'creator': 'Kanal 4',
|
||||
'series': 'Mig og min mor',
|
||||
'season_number': 6,
|
||||
'episode_number': 12,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
domain = mobj.group('domain')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id="(\d+)"', webpage, 'video id')
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||
|
||||
info = self._download_json(
|
||||
'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id,
|
||||
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
||||
video_id)['data'][0]
|
||||
|
||||
self._set_cookie(
|
||||
'secure.dplay.se', 'dsc-geo',
|
||||
'{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000))
|
||||
# TODO: consider adding support for 'stream_type=hds', it seems to
|
||||
# require setting some cookies
|
||||
manifest_url = self._download_json(
|
||||
'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id,
|
||||
video_id, 'Getting manifest url for hls stream')['hls']
|
||||
formats = self._extract_m3u8_formats(
|
||||
manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||
title = info['title']
|
||||
|
||||
PROTOCOLS = ('hls', 'hds')
|
||||
formats = []
|
||||
|
||||
def extract_formats(protocol, manifest_url):
|
||||
if protocol == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False))
|
||||
elif protocol == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
|
||||
video_id, f4m_id=protocol, fatal=False))
|
||||
|
||||
domain_tld = domain.split('.')[-1]
|
||||
if domain_tld in ('se', 'dk'):
|
||||
for protocol in PROTOCOLS:
|
||||
self._set_cookie(
|
||||
'secure.dplay.%s' % domain_tld, 'dsc-geo',
|
||||
json.dumps({
|
||||
'countryCode': domain_tld.upper(),
|
||||
'expiry': (time.time() + 20 * 60) * 1000,
|
||||
}))
|
||||
stream = self._download_json(
|
||||
'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s'
|
||||
% (domain_tld, video_id, protocol), video_id,
|
||||
'Downloading %s stream JSON' % protocol, fatal=False)
|
||||
if stream and stream.get(protocol):
|
||||
extract_formats(protocol, stream[protocol])
|
||||
else:
|
||||
for protocol in PROTOCOLS:
|
||||
if info.get(protocol):
|
||||
extract_formats(protocol, info[protocol])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': info.get('video_metadata_longDescription'),
|
||||
'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
|
||||
'timestamp': int_or_none(info.get('video_publish_date')),
|
||||
'creator': info.get('video_metadata_homeChannel'),
|
||||
'series': info.get('video_metadata_show'),
|
||||
'season_number': int_or_none(info.get('season')),
|
||||
'episode_number': int_or_none(info.get('episode')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
85
youtube_dl/extractor/dw.py
Normal file
85
youtube_dl/extractor/dw.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class DWIE(InfoExtractor):
|
||||
IE_NAME = 'dw'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
|
||||
'md5': '7372046e1815c5a534b43f3c3c36e6e9',
|
||||
'info_dict': {
|
||||
'id': '19112290',
|
||||
'ext': 'mp4',
|
||||
'title': 'Intelligent light',
|
||||
'description': 'md5:90e00d5881719f2a6a5827cb74985af1',
|
||||
'upload_date': '20160311',
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.dw.com/en/worldlink-my-business/av-19111941',
|
||||
'md5': '2814c9a1321c3a51f8a7aeb067a360dd',
|
||||
'info_dict': {
|
||||
'id': '19111941',
|
||||
'ext': 'mp3',
|
||||
'title': 'WorldLink: My business',
|
||||
'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
|
||||
'upload_date': '20160311',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
hidden_inputs = self._hidden_inputs(webpage)
|
||||
title = hidden_inputs['media_title']
|
||||
|
||||
formats = []
|
||||
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
|
||||
formats = self._extract_smil_formats(
|
||||
'http://www.dw.com/smil/v-%s' % media_id, media_id,
|
||||
transform_source=lambda s: s.replace(
|
||||
'rtmp://tv-od.dw.de/flash/',
|
||||
'http://tv-download.dw.de/dwtv_video/flv/'))
|
||||
else:
|
||||
formats = [{'url': hidden_inputs['file_name']}]
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': hidden_inputs.get('preview_image'),
|
||||
'duration': int_or_none(hidden_inputs.get('file_duration')),
|
||||
'upload_date': hidden_inputs.get('display_date'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class DWArticleIE(InfoExtractor):
|
||||
IE_NAME = 'dw:article'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dw.com/en/no-hope-limited-options-for-refugees-in-idomeni/a-19111009',
|
||||
'md5': '8ca657f9d068bbef74d6fc38b97fc869',
|
||||
'info_dict': {
|
||||
'id': '19105868',
|
||||
'ext': 'mp4',
|
||||
'title': 'The harsh life of refugees in Idomeni',
|
||||
'description': 'md5:196015cc7e48ebf474db9399420043c7',
|
||||
'upload_date': '20160310',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
hidden_inputs = self._hidden_inputs(webpage)
|
||||
media_id = hidden_inputs['media_id']
|
||||
media_path = self._search_regex(r'href="([^"]+av-%s)"\s+class="overlayLink"' % media_id, webpage, 'media url')
|
||||
media_url = compat_urlparse.urljoin(url, media_path)
|
||||
return self.url_result(media_url, 'DW', media_id)
|
@@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
|
||||
IE_DESC = 'El País'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
|
||||
'md5': '98406f301f19562170ec071b83433d55',
|
||||
'info_dict': {
|
||||
@@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
|
||||
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
|
||||
'upload_date': '20140206',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
|
||||
'md5': '3bd5b09509f3519d7d9e763179b013de',
|
||||
'info_dict': {
|
||||
'id': '1456340311_668921',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cómo hacer el mejor café con cafetera italiana',
|
||||
'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
|
||||
'upload_date': '20160303',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
prefix = self._html_search_regex(
|
||||
r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
|
||||
r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
|
||||
video_suffix = self._search_regex(
|
||||
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
|
||||
r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
|
||||
video_url = prefix + video_suffix
|
||||
thumbnail_suffix = self._search_regex(
|
||||
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
|
||||
fatal=False)
|
||||
r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
|
||||
webpage, 'thumbnail URL', fatal=False)
|
||||
thumbnail = (
|
||||
None if thumbnail_suffix is None
|
||||
else prefix + thumbnail_suffix)
|
||||
title = self._html_search_regex(
|
||||
'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
|
||||
(r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
|
||||
r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
|
||||
webpage, 'title')
|
||||
date_str = self._search_regex(
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
|
||||
webpage, 'upload date', fatal=False)
|
||||
upload_date = (None if date_str is None else unified_strdate(date_str))
|
||||
webpage, 'upload date', default=None) or self._html_search_meta(
|
||||
'datePublished', webpage, 'timestamp'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,21 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class EngadgetIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://www.engadget.com/
|
||||
(?:video(?:/5min)?/(?P<id>\d+)|
|
||||
[\d/]+/.*?)
|
||||
'''
|
||||
_VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.engadget.com/video/5min/518153925/',
|
||||
'url': 'http://www.engadget.com/video/518153925/',
|
||||
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
|
||||
'info_dict': {
|
||||
'id': '518153925',
|
||||
@@ -27,15 +19,4 @@ class EngadgetIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if video_id is not None:
|
||||
return self.url_result('5min:%s' % video_id)
|
||||
else:
|
||||
title = url_basename(url)
|
||||
webpage = self._download_webpage(url, title)
|
||||
ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': title,
|
||||
'entries': [self.url_result('5min:%s' % vid) for vid in ids]
|
||||
}
|
||||
return self.url_result('5min:%s' % video_id)
|
||||
|
@@ -34,9 +34,12 @@ class FacebookIE(InfoExtractor):
|
||||
video/video\.php|
|
||||
photo\.php|
|
||||
video\.php|
|
||||
video/embed
|
||||
)\?(?:.*?)(?:v|video_id)=|
|
||||
[^/]+/videos/(?:[^/]+/)?
|
||||
video/embed|
|
||||
story\.php
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
groups/[^/]+/permalink/
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
@@ -49,6 +52,8 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||
|
||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
||||
@@ -80,6 +85,33 @@ class FacebookIE(InfoExtractor):
|
||||
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
|
||||
'uploader': 'Demy de Zeeuw',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
|
||||
'info_dict': {
|
||||
'id': '544765982287235',
|
||||
'ext': 'mp4',
|
||||
'title': '"What are you doing running in the snow?"',
|
||||
'uploader': 'FailArmy',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
|
||||
'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
|
||||
'info_dict': {
|
||||
'id': '1035862816472149',
|
||||
'ext': 'mp4',
|
||||
'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
|
||||
'uploader': 'S. Saint',
|
||||
},
|
||||
}, {
|
||||
'note': 'swf params escaped',
|
||||
'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
|
||||
'md5': '97ba073838964d12c70566e0085c2b91',
|
||||
'info_dict': {
|
||||
'id': '10153664894881749',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #10153664894881749',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@@ -92,6 +124,9 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'facebook:544765982287235',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -160,19 +195,19 @@ class FacebookIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
|
||||
def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('User-Agent', self._CHROME_USER_AGENT)
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_data = None
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});\n'
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
||||
m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
|
||||
if m:
|
||||
data = dict(json.loads(m.group(1)))
|
||||
swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
|
||||
data = dict(json.loads(swf_params))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
video_data = json.loads(params_raw)['video_data']
|
||||
|
||||
@@ -185,13 +220,15 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
|
||||
r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||
break
|
||||
|
||||
if not video_data:
|
||||
if not fatal_if_no_video:
|
||||
return webpage, False
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
if m_msg is not None:
|
||||
raise ExtractorError(
|
||||
@@ -208,10 +245,13 @@ class FacebookIE(InfoExtractor):
|
||||
for src_type in ('src', 'src_no_ratelimit'):
|
||||
src = f[0].get('%s_%s' % (quality, src_type))
|
||||
if src:
|
||||
preference = -10 if format_id == 'progressive' else 0
|
||||
if quality == 'hd':
|
||||
preference += 5
|
||||
formats.append({
|
||||
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
|
||||
'url': src,
|
||||
'preference': -10 if format_id == 'progressive' else 0,
|
||||
'preference': preference,
|
||||
})
|
||||
dash_manifest = f[0].get('dash_manifest')
|
||||
if dash_manifest:
|
||||
@@ -234,39 +274,36 @@ class FacebookIE(InfoExtractor):
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
|
||||
return {
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
|
||||
class FacebookPostIE(InfoExtractor):
|
||||
IE_NAME = 'facebook:post'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
|
||||
'info_dict': {
|
||||
'id': '544765982287235',
|
||||
'ext': 'mp4',
|
||||
'title': '"What are you doing running in the snow?"',
|
||||
'uploader': 'FailArmy',
|
||||
}
|
||||
}
|
||||
return webpage, info_dict
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, post_id)
|
||||
real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
|
||||
webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
|
||||
|
||||
entries = [
|
||||
self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
|
||||
for video_id in self._parse_json(
|
||||
self._search_regex(
|
||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
|
||||
webpage, 'video ids', group='ids'),
|
||||
post_id)]
|
||||
if info_dict:
|
||||
return info_dict
|
||||
|
||||
return self.playlist_result(entries, post_id)
|
||||
if '/posts/' in url:
|
||||
entries = [
|
||||
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
|
||||
for vid in self._parse_json(
|
||||
self._search_regex(
|
||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
|
||||
webpage, 'video ids', group='ids'),
|
||||
video_id)]
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
else:
|
||||
_, info_dict = self._extract_from_url(
|
||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
||||
video_id, fatal_if_no_video=True)
|
||||
return info_dict
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
@@ -16,12 +18,7 @@ from ..utils import (
|
||||
|
||||
class FiveMinIE(InfoExtractor):
|
||||
IE_NAME = '5min'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
||||
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
|
||||
5min:)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor):
|
||||
'title': 'How to Make a Next-Level Fruit Salad',
|
||||
'duration': 184,
|
||||
},
|
||||
'skip': 'no longer available',
|
||||
},
|
||||
]
|
||||
_ERRORS = {
|
||||
@@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
sid = mobj.group('sid')
|
||||
|
||||
if mobj.group('query'):
|
||||
qs = compat_parse_qs(mobj.group('query'))
|
||||
if not qs.get('playList'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
video_id = qs['playList'][0]
|
||||
if qs.get('sid'):
|
||||
sid = qs['sid'][0]
|
||||
|
||||
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
'Downloading embed page')
|
||||
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
|
||||
query = compat_urllib_parse.urlencode({
|
||||
'func': 'GetResults',
|
||||
'playlist': video_id,
|
||||
'sid': sid,
|
||||
'isPlayerSeed': 'true',
|
||||
'url': embed_url,
|
||||
})
|
||||
if not sid:
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
'Downloading embed page')
|
||||
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
|
||||
|
||||
response = self._download_json(
|
||||
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
|
||||
'https://syn.5min.com/handlers/SenseHandler.ashx?' +
|
||||
compat_urllib_parse.urlencode({
|
||||
'func': 'GetResults',
|
||||
'playlist': video_id,
|
||||
'sid': sid,
|
||||
'isPlayerSeed': 'true',
|
||||
'url': embed_url,
|
||||
}),
|
||||
video_id)
|
||||
if not response['success']:
|
||||
raise ExtractorError(
|
||||
@@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor):
|
||||
parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
|
||||
for rendition in info['Renditions']:
|
||||
if rendition['RenditionType'] == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
|
||||
elif rendition['RenditionType'] == 'aac':
|
||||
if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
|
||||
continue
|
||||
else:
|
||||
rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
|
||||
|
@@ -36,6 +36,10 @@ class FoxNewsIE(AMPIE):
|
||||
# 'upload_date': '20141204',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
|
||||
|
@@ -14,7 +14,7 @@ class FreespeechIE(InfoExtractor):
|
||||
'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
|
||||
'info_dict': {
|
||||
'id': 'poKsVCZ64uU',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Obama, Romney Campaign in Colorado Ahead of Debate',
|
||||
'description': 'Obama, Romney Campaign in Colorado Ahead of Debate',
|
||||
'uploader': 'freespeechtv',
|
||||
|
@@ -2,42 +2,27 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class GameInformerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
|
||||
_TEST = {
|
||||
'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
|
||||
'md5': '292f26da1ab4beb4c9099f1304d2b071',
|
||||
'info_dict': {
|
||||
'id': '4515472681001',
|
||||
'ext': 'm3u8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Replay - Animal Crossing',
|
||||
'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
|
||||
'timestamp': 1443457610706,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'timestamp': 1443457610,
|
||||
'upload_date': '20150928',
|
||||
'uploader_id': '694940074001',
|
||||
},
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
bc_api_url = self._search_regex(r"getVideo\('([^']+)'", webpage, 'brightcove api url')
|
||||
json_data = self._download_json(
|
||||
bc_api_url + '&video_fields=id,name,shortDescription,publishedDate,videoStillURL,length,IOSRenditions',
|
||||
display_id)
|
||||
|
||||
return {
|
||||
'id': compat_str(json_data['id']),
|
||||
'display_id': display_id,
|
||||
'url': json_data['IOSRenditions'][0]['url'],
|
||||
'title': json_data['name'],
|
||||
'description': json_data.get('shortDescription'),
|
||||
'timestamp': int_or_none(json_data.get('publishedDate')),
|
||||
'duration': int_or_none(json_data.get('length')),
|
||||
}
|
||||
brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id')
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
|
@@ -47,6 +47,7 @@ from .senateisvp import SenateISVPIE
|
||||
from .svt import SVTIE
|
||||
from .pornhub import PornHubIE
|
||||
from .xhamster import XHamsterEmbedIE
|
||||
from .tnaflix import TNAFlixNetworkEmbedIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
@@ -1241,28 +1242,34 @@ class GenericIE(InfoExtractor):
|
||||
full_response = self._request_webpage(request, video_id)
|
||||
head_response = full_response
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
}
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = head_response.headers.get('Content-Type', '')
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
|
||||
if m:
|
||||
upload_date = unified_strdate(
|
||||
head_response.headers.get('Last-Modified'))
|
||||
formats = []
|
||||
if m.group('format_id').endswith('mpegurl'):
|
||||
format_id = m.group('format_id')
|
||||
if format_id.endswith('mpegurl'):
|
||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
elif format_id == 'f4m':
|
||||
formats = self._extract_f4m_formats(url, video_id)
|
||||
else:
|
||||
formats = [{
|
||||
'format_id': m.group('format_id'),
|
||||
'url': url,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
info_dict.update({
|
||||
'direct': True,
|
||||
'formats': formats,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
})
|
||||
return info_dict
|
||||
|
||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||
force = self._downloader.params.get('force_generic_extractor', False)
|
||||
@@ -1290,13 +1297,12 @@ class GenericIE(InfoExtractor):
|
||||
'URL could be a direct video link, returning it as such.')
|
||||
upload_date = unified_strdate(
|
||||
head_response.headers.get('Last-Modified'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
info_dict.update({
|
||||
'direct': True,
|
||||
'url': url,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
})
|
||||
return info_dict
|
||||
|
||||
webpage = self._webpage_read_content(
|
||||
full_response, url, video_id, prefix=first_bytes)
|
||||
@@ -1313,12 +1319,12 @@ class GenericIE(InfoExtractor):
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'formats': self._parse_mpd_formats(
|
||||
doc, video_id, mpd_base_url=url.rpartition('/')[0]),
|
||||
}
|
||||
info_dict['formats'] = self._parse_mpd_formats(
|
||||
doc, video_id, mpd_base_url=url.rpartition('/')[0])
|
||||
return info_dict
|
||||
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||
info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
|
||||
return info_dict
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
@@ -1633,6 +1639,11 @@ class GenericIE(InfoExtractor):
|
||||
if xhamster_urls:
|
||||
return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
|
||||
|
||||
# Look for embedded TNAFlixNetwork player
|
||||
tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
|
||||
if tnaflix_urls:
|
||||
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
|
||||
|
||||
# Look for embedded Tvigle player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||
@@ -1979,6 +1990,8 @@ class GenericIE(InfoExtractor):
|
||||
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||
elif ext == 'mpd':
|
||||
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
|
||||
elif ext == 'f4m':
|
||||
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
|
||||
else:
|
||||
entry_info_dict['url'] = video_url
|
||||
|
||||
|
@@ -10,8 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class GoogleDriveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
|
||||
'info_dict': {
|
||||
@@ -20,7 +20,11 @@ class GoogleDriveIE(InfoExtractor):
|
||||
'title': 'Big Buck Bunny.mp4',
|
||||
'duration': 46,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# video id is longer than 28 characters
|
||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FORMATS_EXT = {
|
||||
'5': 'flv',
|
||||
'6': 'flv',
|
||||
@@ -43,7 +47,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
|
||||
r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})',
|
||||
webpage)
|
||||
if mobj:
|
||||
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
|
||||
|
@@ -42,7 +42,7 @@ class ImdbIE(InfoExtractor):
|
||||
for f_url, f_name in extra_formats]
|
||||
format_pages.append(player_page)
|
||||
|
||||
quality = qualities(['SD', '480p', '720p'])
|
||||
quality = qualities(('SD', '480p', '720p', '1080p'))
|
||||
formats = []
|
||||
for format_page in format_pages:
|
||||
json_data = self._search_regex(
|
||||
|
@@ -73,7 +73,7 @@ class IndavideoEmbedIE(InfoExtractor):
|
||||
'url': self._proto_relative_url(thumbnail)
|
||||
} for thumbnail in video.get('thumbnails', [])]
|
||||
|
||||
tags = [tag['title'] for tag in video.get('tags', [])]
|
||||
tags = [tag['title'] for tag in video.get('tags') or []]
|
||||
|
||||
return {
|
||||
'id': video.get('id') or video_id,
|
||||
|
@@ -4,15 +4,12 @@ from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_parse_qs,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import determine_ext
|
||||
from .bokecc import BokeCCBaseIE
|
||||
|
||||
|
||||
class InfoQIE(InfoExtractor):
|
||||
class InfoQIE(BokeCCBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -38,26 +35,6 @@ class InfoQIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_bokecc_videos(self, webpage, video_id):
|
||||
# TODO: bokecc.com is a Chinese video cloud platform
|
||||
# It should have an independent extractor but I don't have other
|
||||
# examples using bokecc
|
||||
player_params_str = self._html_search_regex(
|
||||
r'<script[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
|
||||
webpage, 'player params', default=None)
|
||||
|
||||
player_params = compat_parse_qs(player_params_str)
|
||||
|
||||
info_xml = self._download_xml(
|
||||
'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
|
||||
player_params['siteid'][0], player_params['vid'][0]), video_id)
|
||||
|
||||
return [{
|
||||
'format_id': 'bokecc',
|
||||
'url': quality.find('./copy').attrib['playurl'],
|
||||
'preference': int(quality.attrib['value']),
|
||||
} for quality in info_xml.findall('./video/quality')]
|
||||
|
||||
def _extract_rtmp_videos(self, webpage):
|
||||
# The server URL is hardcoded
|
||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
||||
@@ -101,7 +78,7 @@ class InfoQIE(InfoExtractor):
|
||||
|
||||
if '/cn/' in url:
|
||||
# for China videos, HTTP video URL exists but always fails with 403
|
||||
formats = self._extract_bokecc_videos(webpage, video_id)
|
||||
formats = self._extract_bokecc_formats(webpage, video_id)
|
||||
else:
|
||||
formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
|
||||
|
||||
|
@@ -18,6 +18,7 @@ from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
ExtractorError,
|
||||
ohdave_rsa_encrypt,
|
||||
remove_start,
|
||||
@@ -126,43 +127,11 @@ class IqiyiSDK(object):
|
||||
|
||||
|
||||
class IqiyiSDKInterpreter(object):
|
||||
BASE62_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||
|
||||
def __init__(self, sdk_code):
|
||||
self.sdk_code = sdk_code
|
||||
|
||||
@classmethod
|
||||
def base62(cls, num):
|
||||
if num == 0:
|
||||
return '0'
|
||||
ret = ''
|
||||
while num:
|
||||
ret = cls.BASE62_TABLE[num % 62] + ret
|
||||
num = num // 62
|
||||
return ret
|
||||
|
||||
def decode_eval_codes(self):
|
||||
self.sdk_code = self.sdk_code[5:-3]
|
||||
|
||||
mobj = re.search(
|
||||
r"'([^']+)',62,(\d+),'([^']+)'\.split\('\|'\),[^,]+,{}",
|
||||
self.sdk_code)
|
||||
obfucasted_code, count, symbols = mobj.groups()
|
||||
count = int(count)
|
||||
symbols = symbols.split('|')
|
||||
symbol_table = {}
|
||||
|
||||
while count:
|
||||
count -= 1
|
||||
b62count = self.base62(count)
|
||||
symbol_table[b62count] = symbols[count] or b62count
|
||||
|
||||
self.sdk_code = re.sub(
|
||||
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
|
||||
obfucasted_code)
|
||||
|
||||
def run(self, target, ip, timestamp):
|
||||
self.decode_eval_codes()
|
||||
self.sdk_code = decode_packed_codes(self.sdk_code)
|
||||
|
||||
functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code)
|
||||
|
||||
@@ -529,10 +498,10 @@ class IqiyiIE(InfoExtractor):
|
||||
raw_data = self._download_json(api_url, video_id)
|
||||
return raw_data
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
def get_enc_key(self, video_id):
|
||||
# TODO: automatic key extraction
|
||||
# last update at 2016-01-22 for Zombie::bite
|
||||
enc_key = '6ab6d0280511493ba85594779759d4ed'
|
||||
enc_key = '8ed797d224d043e7ac23d95b70227d32'
|
||||
return enc_key
|
||||
|
||||
def _extract_playlist(self, webpage):
|
||||
@@ -582,11 +551,9 @@ class IqiyiIE(InfoExtractor):
|
||||
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
|
||||
video_id = self._search_regex(
|
||||
r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
|
||||
swf_url = self._search_regex(
|
||||
r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL')
|
||||
_uuid = uuid.uuid4().hex
|
||||
|
||||
enc_key = self.get_enc_key(swf_url, video_id)
|
||||
enc_key = self.get_enc_key(video_id)
|
||||
|
||||
raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
|
||||
|
||||
|
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, title)
|
||||
title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
|
||||
config_url = self._html_search_regex(
|
||||
r'data-src="(/contenu/medias/video.php.*?)"',
|
||||
r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
|
||||
webpage, 'config URL')
|
||||
config_url = 'http://www.jeuxvideo.com' + config_url
|
||||
|
||||
|
@@ -7,7 +7,46 @@ from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class JWPlatformIE(InfoExtractor):
|
||||
class JWPlatformBaseIE(InfoExtractor):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
|
||||
video_data = jwplayer_data['playlist'][0]
|
||||
subtitles = {}
|
||||
for track in video_data['tracks']:
|
||||
if track['kind'] == 'captions':
|
||||
subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
source_type = source.get('type') or ''
|
||||
if source_type in ('application/vnd.apple.mpegurl', 'hls'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
|
||||
elif source_type.startswith('audio'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class JWPlatformIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TEST = {
|
||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
@@ -33,38 +72,4 @@ class JWPlatformIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
|
||||
video_data = json_data['playlist'][0]
|
||||
subtitles = {}
|
||||
for track in video_data['tracks']:
|
||||
if track['kind'] == 'captions':
|
||||
subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
source_type = source.get('type') or ''
|
||||
if source_type == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
|
||||
elif source_type.startswith('audio'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'],
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
return self._parse_jwplayer_data(json_data, video_id)
|
||||
|
@@ -8,6 +8,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
@@ -20,21 +21,17 @@ from ..utils import (
|
||||
class KalturaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
kaltura:(?P<partner_id_s>\d+):(?P<id_s>[0-9a-z_]+)|
|
||||
kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
index\.php/kwidget/
|
||||
(?:[^/]+/)*?wid/_(?P<partner_id>\d+)/
|
||||
(?:[^/]+/)*?entry_id/(?P<id>[0-9a-z_]+)|
|
||||
index\.php/kwidget|
|
||||
# html5 player
|
||||
html5/html5lib/
|
||||
(?:[^/]+/)*?entry_id/(?P<id_html5>[0-9a-z_]+)
|
||||
.*\?.*\bwid=_(?P<partner_id_html5>\d+)
|
||||
html5/html5lib/[^/]+/mwEmbedFrame\.php
|
||||
)
|
||||
)
|
||||
)(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
|
||||
)
|
||||
'''
|
||||
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
|
||||
@@ -127,10 +124,41 @@ class KalturaIE(InfoExtractor):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
|
||||
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
|
||||
|
||||
info, flavor_assets = self._get_video_info(entry_id, partner_id)
|
||||
partner_id, entry_id = mobj.group('partner_id', 'id')
|
||||
ks = None
|
||||
if partner_id and entry_id:
|
||||
info, flavor_assets = self._get_video_info(entry_id, partner_id)
|
||||
else:
|
||||
path, query = mobj.group('path', 'query')
|
||||
if not path and not query:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
params = {}
|
||||
if query:
|
||||
params = compat_parse_qs(query)
|
||||
if path:
|
||||
splitted_path = path.split('/')
|
||||
params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))))
|
||||
if 'wid' in params:
|
||||
partner_id = params['wid'][0][1:]
|
||||
elif 'p' in params:
|
||||
partner_id = params['p'][0]
|
||||
else:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
if 'entry_id' in params:
|
||||
entry_id = params['entry_id'][0]
|
||||
info, flavor_assets = self._get_video_info(entry_id, partner_id)
|
||||
elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
|
||||
reference_id = params['flashvars[referenceId]'][0]
|
||||
webpage = self._download_webpage(url, reference_id)
|
||||
entry_data = self._parse_json(self._search_regex(
|
||||
r'window\.kalturaIframePackageData\s*=\s*({.*});',
|
||||
webpage, 'kalturaIframePackageData'),
|
||||
reference_id)['entryResult']
|
||||
info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets']
|
||||
entry_id = info['id']
|
||||
else:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
ks = params.get('flashvars[ks]', [None])[0]
|
||||
|
||||
source_url = smuggled_data.get('source_url')
|
||||
if source_url:
|
||||
@@ -140,14 +168,19 @@ class KalturaIE(InfoExtractor):
|
||||
else:
|
||||
referrer = None
|
||||
|
||||
def sign_url(unsigned_url):
|
||||
if ks:
|
||||
unsigned_url += '/ks/%s' % ks
|
||||
if referrer:
|
||||
unsigned_url += '?referrer=%s' % referrer
|
||||
return unsigned_url
|
||||
|
||||
formats = []
|
||||
for f in flavor_assets:
|
||||
# Continue if asset is not ready
|
||||
if f['status'] != 2:
|
||||
continue
|
||||
video_url = '%s/flavorId/%s' % (info['dataUrl'], f['id'])
|
||||
if referrer:
|
||||
video_url += '?referrer=%s' % referrer
|
||||
video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id']))
|
||||
formats.append({
|
||||
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||
'ext': f.get('fileExt'),
|
||||
@@ -160,9 +193,7 @@ class KalturaIE(InfoExtractor):
|
||||
'width': int_or_none(f.get('width')),
|
||||
'url': video_url,
|
||||
})
|
||||
m3u8_url = info['dataUrl'].replace('format/url', 'format/applehttp')
|
||||
if referrer:
|
||||
m3u8_url += '?referrer=%s' % referrer
|
||||
m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp'))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
|
@@ -14,10 +14,10 @@ class KhanAcademyIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.khanacademy.org/video/one-time-pad',
|
||||
'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
|
||||
'md5': '7b391cce85e758fb94f763ddc1bbb979',
|
||||
'info_dict': {
|
||||
'id': 'one-time-pad',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'The one-time pad',
|
||||
'description': 'The perfect cipher',
|
||||
'duration': 176,
|
||||
|
99
youtube_dl/extractor/kusi.py
Normal file
99
youtube_dl/extractor/kusi.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote_plus
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
timeconvert,
|
||||
update_url_query,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class KUSIIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
|
||||
'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
|
||||
'info_dict': {
|
||||
'id': '12203019',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turko Files: Case Closed! & Put On Hold!',
|
||||
'duration': 231.0,
|
||||
'upload_date': '20160210',
|
||||
'timestamp': 1455087571,
|
||||
'thumbnail': 're:^https?://.*\.jpg$'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://kusi.com/video?clipId=12203019',
|
||||
'info_dict': {
|
||||
'id': '12203019',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turko Files: Case Closed! & Put On Hold!',
|
||||
'duration': 231.0,
|
||||
'upload_date': '20160210',
|
||||
'timestamp': 1455087571,
|
||||
'thumbnail': 're:^https?://.*\.jpg$'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Same as previous one
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
clip_id = mobj.group('clipId')
|
||||
video_id = clip_id or mobj.group('path')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if clip_id is None:
|
||||
video_id = clip_id = self._html_search_regex(
|
||||
r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
|
||||
|
||||
affiliate_id = self._search_regex(
|
||||
r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
|
||||
|
||||
# See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
|
||||
xml_url = update_url_query('http://www.kusi.com/build.asp', {
|
||||
'buildtype': 'buildfeaturexmlrequest',
|
||||
'featureType': 'Clip',
|
||||
'featureid': clip_id,
|
||||
'affiliateno': affiliate_id,
|
||||
'clientgroupid': '1',
|
||||
'rnd': int(round(random.random() * 1000000)),
|
||||
})
|
||||
|
||||
doc = self._download_xml(xml_url, video_id)
|
||||
|
||||
video_title = xpath_text(doc, 'HEADLINE', fatal=True)
|
||||
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
||||
description = xpath_text(doc, 'ABSTRACT')
|
||||
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
||||
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||
|
||||
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
||||
formats = []
|
||||
for quality in quality_options:
|
||||
formats.append({
|
||||
'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
|
||||
'height': int_or_none(quality.attrib.get('height')),
|
||||
'width': int_or_none(quality.attrib.get('width')),
|
||||
'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': createtion_time,
|
||||
}
|
@@ -23,7 +23,7 @@ class KuwoBaseIE(InfoExtractor):
|
||||
{'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
|
||||
]
|
||||
|
||||
def _get_formats(self, song_id):
|
||||
def _get_formats(self, song_id, tolerate_ip_deny=False):
|
||||
formats = []
|
||||
for file_format in self._FORMATS:
|
||||
song_url = self._download_webpage(
|
||||
@@ -32,7 +32,7 @@ class KuwoBaseIE(InfoExtractor):
|
||||
song_id, note='Download %s url info' % file_format['format'],
|
||||
)
|
||||
|
||||
if song_url == 'IPDeny':
|
||||
if song_url == 'IPDeny' and not tolerate_ip_deny:
|
||||
raise ExtractorError('This song is blocked in this region', expected=True)
|
||||
|
||||
if song_url.startswith('http://') or song_url.startswith('https://'):
|
||||
@@ -43,7 +43,12 @@ class KuwoBaseIE(InfoExtractor):
|
||||
'preference': file_format['preference'],
|
||||
'abr': file_format.get('abr'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
# XXX _sort_formats fails if there are not formats, while it's not the
|
||||
# desired behavior if 'IPDeny' is ignored
|
||||
# This check can be removed if https://github.com/rg3/youtube-dl/pull/8051 is merged
|
||||
if not tolerate_ip_deny:
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
@@ -68,6 +73,7 @@ class KuwoIE(KuwoBaseIE):
|
||||
'id': '6446136',
|
||||
'ext': 'mp3',
|
||||
'title': '心',
|
||||
'description': 'md5:b2ab6295d014005bfc607525bfc1e38a',
|
||||
'creator': 'IU',
|
||||
'upload_date': '20150518',
|
||||
},
|
||||
@@ -287,10 +293,16 @@ class KuwoMvIE(KuwoBaseIE):
|
||||
'url': 'http://www.kuwo.cn/mv/6480076/',
|
||||
'info_dict': {
|
||||
'id': '6480076',
|
||||
'ext': 'mkv',
|
||||
'title': '我们家MV',
|
||||
'ext': 'mp4',
|
||||
'title': 'My HouseMV',
|
||||
'creator': '2PM',
|
||||
},
|
||||
# In this video, music URLs (anti.s) are blocked outside China and
|
||||
# USA, while the MV URL (mvurl) is available globally, so force the MV
|
||||
# URL for consistent results in different countries
|
||||
'params': {
|
||||
'format': 'mv',
|
||||
},
|
||||
}
|
||||
_FORMATS = KuwoBaseIE._FORMATS + [
|
||||
{'format': 'mkv', 'ext': 'mkv', 'preference': 250},
|
||||
@@ -312,7 +324,17 @@ class KuwoMvIE(KuwoBaseIE):
|
||||
else:
|
||||
raise ExtractorError('Unable to find song or singer names')
|
||||
|
||||
formats = self._get_formats(song_id)
|
||||
formats = self._get_formats(song_id, tolerate_ip_deny=True)
|
||||
|
||||
mv_url = self._download_webpage(
|
||||
'http://www.kuwo.cn/yy/st/mvurl?rid=MUSIC_%s' % song_id,
|
||||
song_id, note='Download %s MV URL' % song_id)
|
||||
formats.append({
|
||||
'url': mv_url,
|
||||
'format_id': 'mv',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
|
@@ -1,36 +1,39 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
encode_data_uri,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
encode_data_uri,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class LetvIE(InfoExtractor):
|
||||
class LeIE(InfoExtractor):
|
||||
IE_DESC = '乐视网'
|
||||
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
|
||||
_VALID_URL = r'http://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html'
|
||||
|
||||
_URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.letv.com/ptv/vplay/22005890.html',
|
||||
'url': 'http://www.le.com/ptv/vplay/22005890.html',
|
||||
'md5': 'edadcfe5406976f42f9f266057ee5e40',
|
||||
'info_dict': {
|
||||
'id': '22005890',
|
||||
@@ -42,7 +45,7 @@ class LetvIE(InfoExtractor):
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.letv.com/ptv/vplay/1415246.html',
|
||||
'url': 'http://www.le.com/ptv/vplay/1415246.html',
|
||||
'info_dict': {
|
||||
'id': '1415246',
|
||||
'ext': 'mp4',
|
||||
@@ -54,7 +57,7 @@ class LetvIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'note': 'This video is available only in Mainland China, thus a proxy is needed',
|
||||
'url': 'http://www.letv.com/ptv/vplay/1118082.html',
|
||||
'url': 'http://www.le.com/ptv/vplay/1118082.html',
|
||||
'md5': '2424c74948a62e5f31988438979c5ad1',
|
||||
'info_dict': {
|
||||
'id': '1118082',
|
||||
@@ -94,17 +97,16 @@ class LetvIE(InfoExtractor):
|
||||
return encrypted_data
|
||||
encrypted_data = encrypted_data[5:]
|
||||
|
||||
_loc4_ = bytearray()
|
||||
while encrypted_data:
|
||||
b = compat_ord(encrypted_data[0])
|
||||
_loc4_.extend([b // 16, b & 0x0f])
|
||||
encrypted_data = encrypted_data[1:]
|
||||
_loc4_ = bytearray(2 * len(encrypted_data))
|
||||
for idx, val in enumerate(encrypted_data):
|
||||
b = compat_ord(val)
|
||||
_loc4_[2 * idx] = b // 16
|
||||
_loc4_[2 * idx + 1] = b % 16
|
||||
idx = len(_loc4_) - 11
|
||||
_loc4_ = _loc4_[idx:] + _loc4_[:idx]
|
||||
_loc7_ = bytearray()
|
||||
while _loc4_:
|
||||
_loc7_.append(_loc4_[0] * 16 + _loc4_[1])
|
||||
_loc4_ = _loc4_[2:]
|
||||
_loc7_ = bytearray(len(encrypted_data))
|
||||
for i in range(len(encrypted_data)):
|
||||
_loc7_[i] = _loc4_[2 * i] * 16 + _loc4_[2 * i + 1]
|
||||
|
||||
return bytes(_loc7_)
|
||||
|
||||
@@ -117,10 +119,10 @@ class LetvIE(InfoExtractor):
|
||||
'splatid': 101,
|
||||
'format': 1,
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.letv.com'
|
||||
'domain': 'www.le.com'
|
||||
}
|
||||
play_json_req = sanitized_Request(
|
||||
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
||||
'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
||||
)
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
if cn_verification_proxy:
|
||||
@@ -193,26 +195,51 @@ class LetvIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class LetvTvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
|
||||
class LePlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'http://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.letv.com/tv/46177.html',
|
||||
'url': 'http://www.le.com/tv/46177.html',
|
||||
'info_dict': {
|
||||
'id': '46177',
|
||||
'title': '美人天下',
|
||||
'description': 'md5:395666ff41b44080396e59570dbac01c'
|
||||
},
|
||||
'playlist_count': 35
|
||||
}, {
|
||||
'url': 'http://tv.le.com/izt/wuzetian/index.html',
|
||||
'info_dict': {
|
||||
'id': 'wuzetian',
|
||||
'title': '武媚娘传奇',
|
||||
'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
|
||||
},
|
||||
# This playlist contains some extra videos other than the drama itself
|
||||
'playlist_mincount': 96
|
||||
}, {
|
||||
'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml',
|
||||
# This series is moved to http://www.le.com/tv/10005297.html
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.le.com/comic/92063.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://list.le.com/listn/c1009_sc532002_d2_p1_o1.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if LeIE.suitable(url) else super(LePlaylistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
page = self._download_webpage(url, playlist_id)
|
||||
|
||||
media_urls = list(set(re.findall(
|
||||
r'http://www.letv.com/ptv/vplay/\d+.html', page)))
|
||||
entries = [self.url_result(media_url, ie='Letv')
|
||||
for media_url in media_urls]
|
||||
# Currently old domain names are still used in playlists
|
||||
media_ids = orderedSet(re.findall(
|
||||
r'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page))
|
||||
entries = [self.url_result(LeIE._URL_TEMPLATE % media_id, ie='Le')
|
||||
for media_id in media_ids]
|
||||
|
||||
title = self._html_search_meta('keywords', page,
|
||||
fatal=False).split(',')[0]
|
||||
@@ -222,31 +249,9 @@ class LetvTvIE(InfoExtractor):
|
||||
playlist_description=description)
|
||||
|
||||
|
||||
class LetvPlaylistIE(LetvTvIE):
|
||||
_VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.letv.com/izt/wuzetian/index.html',
|
||||
'info_dict': {
|
||||
'id': 'wuzetian',
|
||||
'title': '武媚娘传奇',
|
||||
'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
|
||||
},
|
||||
# This playlist contains some extra videos other than the drama itself
|
||||
'playlist_mincount': 96
|
||||
}, {
|
||||
'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
|
||||
'info_dict': {
|
||||
'id': 'lswjzzjc',
|
||||
# The title should be "劲舞青春", but I can't find a simple way to
|
||||
# determine the playlist title
|
||||
'title': '乐视午间自制剧场',
|
||||
'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
|
||||
},
|
||||
'playlist_mincount': 7
|
||||
}]
|
||||
|
||||
|
||||
class LetvCloudIE(InfoExtractor):
|
||||
# Most of *.letv.com is changed to *.le.com on 2016/01/02
|
||||
# but yuntv.letv.com is kept, so also keep the extractor name
|
||||
IE_DESC = '乐视云'
|
||||
_VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
|
||||
|
||||
@@ -327,7 +332,7 @@ class LetvCloudIE(InfoExtractor):
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': determine_ext(decoded_url),
|
||||
'format_id': int_or_none(play_url.get('vtype')),
|
||||
'format_id': str_or_none(play_url.get('vtype')),
|
||||
'format_note': str_or_none(play_url.get('definition')),
|
||||
'width': int_or_none(play_url.get('vwidth')),
|
||||
'height': int_or_none(play_url.get('vheight')),
|
@@ -20,18 +20,18 @@ class LifeNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://lifenews.ru/news/126342',
|
||||
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
|
||||
# single video embedded via video/source
|
||||
'url': 'http://lifenews.ru/news/98736',
|
||||
'md5': '77c95eaefaca216e32a76a343ad89d23',
|
||||
'info_dict': {
|
||||
'id': '126342',
|
||||
'id': '98736',
|
||||
'ext': 'mp4',
|
||||
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
|
||||
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'upload_date': '20140130',
|
||||
'title': 'Мужчина нашел дома архив оборонного завода',
|
||||
'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
|
||||
'upload_date': '20120805',
|
||||
}
|
||||
}, {
|
||||
# video in <iframe>
|
||||
# single video embedded via iframe
|
||||
'url': 'http://lifenews.ru/news/152125',
|
||||
'md5': '77d19a6f0886cd76bdbf44b4d971a273',
|
||||
'info_dict': {
|
||||
@@ -42,15 +42,33 @@ class LifeNewsIE(InfoExtractor):
|
||||
'upload_date': '20150402',
|
||||
}
|
||||
}, {
|
||||
# two videos embedded via iframe
|
||||
'url': 'http://lifenews.ru/news/153461',
|
||||
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
|
||||
'info_dict': {
|
||||
'id': '153461',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'upload_date': '20150505',
|
||||
}
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
|
||||
'info_dict': {
|
||||
'id': '153461-video1',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'upload_date': '20150505',
|
||||
},
|
||||
}, {
|
||||
'md5': 'ebb3bf3b1ce40e878d0d628e93eb0322',
|
||||
'info_dict': {
|
||||
'id': '153461-video2',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'upload_date': '20150505',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/video/13035',
|
||||
'only_matching': True,
|
||||
@@ -65,10 +83,14 @@ class LifeNewsIE(InfoExtractor):
|
||||
'http://lifenews.ru/%s/%s' % (section, video_id),
|
||||
video_id, 'Downloading page')
|
||||
|
||||
videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
|
||||
iframe_link = self._html_search_regex(
|
||||
'<iframe[^>]+src=["\']([^"\']+)["\']', webpage, 'iframe link', default=None)
|
||||
if not videos and not iframe_link:
|
||||
video_urls = re.findall(
|
||||
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
|
||||
|
||||
iframe_links = re.findall(
|
||||
r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']',
|
||||
webpage)
|
||||
|
||||
if not video_urls and not iframe_links:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
title = remove_end(
|
||||
@@ -95,31 +117,44 @@ class LifeNewsIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
def make_entry(video_id, media, video_number=None):
|
||||
def make_entry(video_id, video_url, index=None):
|
||||
cur_info = dict(common_info)
|
||||
cur_info.update({
|
||||
'id': video_id,
|
||||
'url': media[1],
|
||||
'thumbnail': media[0],
|
||||
'title': title if video_number is None else '%s-video%s' % (title, video_number),
|
||||
'id': video_id if not index else '%s-video%s' % (video_id, index),
|
||||
'url': video_url,
|
||||
'title': title if not index else '%s (Видео %s)' % (title, index),
|
||||
})
|
||||
return cur_info
|
||||
|
||||
if iframe_link:
|
||||
iframe_link = self._proto_relative_url(iframe_link, 'http:')
|
||||
cur_info = dict(common_info)
|
||||
cur_info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': iframe_link,
|
||||
})
|
||||
def make_video_entry(video_id, video_url, index=None):
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
return make_entry(video_id, video_url, index)
|
||||
|
||||
def make_iframe_entry(video_id, video_url, index=None):
|
||||
video_url = self._proto_relative_url(video_url, 'http:')
|
||||
cur_info = make_entry(video_id, video_url, index)
|
||||
cur_info['_type'] = 'url_transparent'
|
||||
return cur_info
|
||||
|
||||
if len(videos) == 1:
|
||||
return make_entry(video_id, videos[0])
|
||||
else:
|
||||
return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
|
||||
if len(video_urls) == 1 and not iframe_links:
|
||||
return make_video_entry(video_id, video_urls[0])
|
||||
|
||||
if len(iframe_links) == 1 and not video_urls:
|
||||
return make_iframe_entry(video_id, iframe_links[0])
|
||||
|
||||
entries = []
|
||||
|
||||
if video_urls:
|
||||
for num, video_url in enumerate(video_urls, 1):
|
||||
entries.append(make_video_entry(video_id, video_url, num))
|
||||
|
||||
if iframe_links:
|
||||
for num, iframe_link in enumerate(iframe_links, len(video_urls) + 1):
|
||||
entries.append(make_iframe_entry(video_id, iframe_link, num))
|
||||
|
||||
playlist = common_info.copy()
|
||||
playlist.update(self.playlist_result(entries, video_id, title, description))
|
||||
return playlist
|
||||
|
||||
|
||||
class LifeEmbedIE(InfoExtractor):
|
||||
|
@@ -14,6 +14,7 @@ from ..utils import (
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
orderedSet,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
@@ -64,7 +65,7 @@ class LivestreamIE(InfoExtractor):
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base_ele = find_xpath_attr(
|
||||
smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
|
||||
base = base_ele.get('content') if base_ele else 'http://livestreamvod-f.akamaihd.net/'
|
||||
base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
|
||||
|
||||
formats = []
|
||||
video_nodes = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
@@ -72,7 +73,10 @@ class LivestreamIE(InfoExtractor):
|
||||
for vn in video_nodes:
|
||||
tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000)
|
||||
furl = (
|
||||
'%s%s?v=3.0.3&fp=WIN%%2014,0,0,145' % (base, vn.attrib['src']))
|
||||
update_url_query(compat_urlparse.urljoin(base, vn.attrib['src']), {
|
||||
'v': '3.0.3',
|
||||
'fp': 'WIN% 14,0,0,145',
|
||||
}))
|
||||
if 'clipBegin' in vn.attrib:
|
||||
furl += '&ssek=' + vn.attrib['clipBegin']
|
||||
formats.append({
|
||||
|
40
youtube_dl/extractor/makerschannel.py
Normal file
40
youtube_dl/extractor/makerschannel.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MakersChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?makerschannel\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://makerschannel.com/en/zoomin/community-highlights?video_id=849',
|
||||
'md5': '624a512c6969236b5967bf9286345ad1',
|
||||
'info_dict': {
|
||||
'id': '849',
|
||||
'ext': 'mp4',
|
||||
'title': 'Landing a bus on a plane is an epic win',
|
||||
'uploader': 'ZoomIn',
|
||||
'description': 'md5:cd9cca2ea7b69b78be81d07020c97139',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
id_type, url_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, url_id)
|
||||
video_data = self._html_search_regex(r'<div([^>]+data-%s-id="%s"[^>]+)>' % (id_type, url_id), webpage, 'video data')
|
||||
|
||||
def extract_data_val(attr, fatal=False):
|
||||
return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal)
|
||||
minoto_id = self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'minoto:%s' % minoto_id,
|
||||
'id': extract_data_val('video-id', True),
|
||||
'title': extract_data_val('title', True),
|
||||
'description': extract_data_val('description'),
|
||||
'thumbnail': extract_data_val('image'),
|
||||
'uploader': extract_data_val('channel'),
|
||||
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
_TESTS = [{
|
||||
# MDR regularly deletes its videos
|
||||
@@ -60,6 +60,9 @@ class MDRIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -68,8 +71,8 @@ class MDRIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = self._search_regex(
|
||||
r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1',
|
||||
webpage, 'data url', group='url')
|
||||
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>\\?/.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
|
||||
webpage, 'data url', default=None, group='url').replace('\/', '/')
|
||||
|
||||
doc = self._download_xml(
|
||||
compat_urlparse.urljoin(url, data_url), video_id)
|
||||
|
56
youtube_dl/extractor/minoto.py
Normal file
56
youtube_dl/extractor/minoto.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class MinotoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:minoto:|https?://(?:play|iframe|embed)\.minoto-video\.com/(?P<player_id>[0-9]+)/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
player_id = mobj.group('player_id') or '1'
|
||||
video_id = mobj.group('id')
|
||||
video_data = self._download_json('http://play.minoto-video.com/%s/%s.js' % (player_id, video_id), video_id)
|
||||
video_metadata = video_data['video-metadata']
|
||||
formats = []
|
||||
for fmt in video_data['video-files']:
|
||||
fmt_url = fmt.get('url')
|
||||
if not fmt_url:
|
||||
continue
|
||||
container = fmt.get('container')
|
||||
if container == 'hls':
|
||||
formats.extend(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
fmt_profile = fmt.get('profile') or {}
|
||||
f = {
|
||||
'format_id': fmt_profile.get('name-short'),
|
||||
'format_note': fmt_profile.get('name'),
|
||||
'url': fmt_url,
|
||||
'container': container,
|
||||
'tbr': int_or_none(fmt.get('bitrate')),
|
||||
'filesize': int_or_none(fmt.get('filesize')),
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
}
|
||||
codecs = fmt.get('codecs')
|
||||
if codecs:
|
||||
codecs = codecs.split(',')
|
||||
if len(codecs) == 2:
|
||||
f.update({
|
||||
'vcodec': codecs[0],
|
||||
'acodec': codecs[1],
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_metadata['title'],
|
||||
'description': video_metadata.get('description'),
|
||||
'thumbnail': video_metadata.get('video-poster', {}).get('url'),
|
||||
'formats': formats,
|
||||
}
|
@@ -99,7 +99,7 @@ class OCWMITIE(InfoExtractor):
|
||||
'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
|
||||
'info_dict': {
|
||||
'id': 'EObHWIEKGjA',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
|
||||
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
|
||||
'upload_date': '20121109',
|
||||
|
@@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
parse_count,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -85,8 +86,8 @@ class MixcloudIE(InfoExtractor):
|
||||
uploader_id = self._search_regex(
|
||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
like_count = str_to_int(self._search_regex(
|
||||
r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
|
||||
like_count = parse_count(self._search_regex(
|
||||
r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
|
||||
webpage, 'like count', fatal=False))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
|
@@ -5,6 +5,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -12,55 +13,62 @@ from ..utils import (
|
||||
|
||||
class MotherlessIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://motherless.com/AC3FFE1',
|
||||
'md5': '310f62e325a9fafe64f68c0bccb6e75f',
|
||||
'info_dict': {
|
||||
'id': 'AC3FFE1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fucked in the ass while playing PS3',
|
||||
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
|
||||
'upload_date': '20100913',
|
||||
'uploader_id': 'famouslyfuckedup',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://motherless.com/532291B',
|
||||
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
|
||||
'info_dict': {
|
||||
'id': '532291B',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amazing girl playing the omegle game, PERFECT!',
|
||||
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
|
||||
'upload_date': '20140622',
|
||||
'uploader_id': 'Sulivana7x',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://motherless.com/g/cosplay/633979F',
|
||||
'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
|
||||
'info_dict': {
|
||||
'id': '633979F',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turtlette',
|
||||
'categories': ['superheroine heroine superher'],
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': 'shade0230',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/AC3FFE1',
|
||||
'md5': '310f62e325a9fafe64f68c0bccb6e75f',
|
||||
'info_dict': {
|
||||
'id': 'AC3FFE1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fucked in the ass while playing PS3',
|
||||
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
|
||||
'upload_date': '20100913',
|
||||
'uploader_id': 'famouslyfuckedup',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'http://motherless.com/532291B',
|
||||
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
|
||||
'info_dict': {
|
||||
'id': '532291B',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amazing girl playing the omegle game, PERFECT!',
|
||||
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
|
||||
'game', 'hairy'],
|
||||
'upload_date': '20140622',
|
||||
'uploader_id': 'Sulivana7x',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': '404',
|
||||
}, {
|
||||
'url': 'http://motherless.com/g/cosplay/633979F',
|
||||
'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
|
||||
'info_dict': {
|
||||
'id': '633979F',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turtlette',
|
||||
'categories': ['superheroine heroine superher'],
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': 'shade0230',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
# no keywords
|
||||
'url': 'http://motherless.com/8B4BBC1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if any(p in webpage for p in (
|
||||
'<title>404 - MOTHERLESS.COM<',
|
||||
">The page you're looking for cannot be found.<")):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
@@ -86,7 +94,7 @@ class MotherlessIE(InfoExtractor):
|
||||
r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
|
||||
webpage, 'uploader_id')
|
||||
|
||||
categories = self._html_search_meta('keywords', webpage)
|
||||
categories = self._html_search_meta('keywords', webpage, default=None)
|
||||
if categories:
|
||||
categories = [cat.strip() for cat in categories.split(',')]
|
||||
|
||||
|
@@ -48,7 +48,7 @@ class NationalGeographicIE(InfoExtractor):
|
||||
theplatform_id = url_basename(content.attrib.get('url'))
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||
'http://link.theplatform.com/s/ngs/%s?formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||
# For some reason, the normal links don't work and we must force
|
||||
# the use of f4m
|
||||
{'force_smil_url': True}))
|
||||
|
@@ -1,18 +1,26 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
xpath_text,
|
||||
xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)?video/(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
||||
@@ -44,14 +52,101 @@ class NBAIE(InfoExtractor):
|
||||
'timestamp': 1432134543,
|
||||
'upload_date': '20150520',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
|
||||
'info_dict': {
|
||||
'id': '1455672027478-Doc_Feb16_720',
|
||||
'ext': 'mp4',
|
||||
'title': 'Practice: Doc Rivers - 2/16/16',
|
||||
'description': 'Head Coach Doc Rivers addresses the media following practice.',
|
||||
'upload_date': '20160217',
|
||||
'timestamp': 1455672000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'timberwolves',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
'params': {
|
||||
# Download the whole playlist takes too long time
|
||||
'playlist_items': '1-30',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'Wigginsmp4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
|
||||
'upload_date': '20141212',
|
||||
'timestamp': 1418418600,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 30
|
||||
|
||||
def _fetch_page(self, team, video_id, page):
|
||||
search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse.urlencode({
|
||||
'type': 'teamvideo',
|
||||
'start': page * self._PAGE_SIZE + 1,
|
||||
'npp': (page + 1) * self._PAGE_SIZE + 1,
|
||||
'sort': 'recent',
|
||||
'output': 'json',
|
||||
'site': team,
|
||||
})
|
||||
results = self._download_json(
|
||||
search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
|
||||
for item in results:
|
||||
yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
|
||||
|
||||
def _extract_playlist(self, orig_path, video_id, webpage):
|
||||
team = orig_path.split('/')[0]
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video because of --no-playlist')
|
||||
video_path = self._search_regex(
|
||||
r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path')
|
||||
video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
|
||||
return self.url_result(video_url)
|
||||
|
||||
self.to_screen('Downloading playlist - add --no-playlist to just download video')
|
||||
playlist_title = self._og_search_title(webpage, fatal=False)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, video_id),
|
||||
self._PAGE_SIZE, use_cache=True)
|
||||
|
||||
return self.playlist_result(entries, team, playlist_title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
orig_path = path
|
||||
if path.startswith('nba/'):
|
||||
path = path[3:]
|
||||
|
||||
if 'video/' not in path:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
|
||||
|
||||
if path == '{{id}}':
|
||||
return self._extract_playlist(orig_path, video_id, webpage)
|
||||
|
||||
# See prepareContentId() of pkgCvp.js
|
||||
if path.startswith('video/teams'):
|
||||
path = 'video/channels/proxy/' + path[6:]
|
||||
|
||||
video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
|
||||
video_id = xpath_text(video_info, 'slug')
|
||||
video_id = os.path.splitext(xpath_text(video_info, 'slug'))[0]
|
||||
title = xpath_text(video_info, 'headline')
|
||||
description = xpath_text(video_info, 'description')
|
||||
duration = parse_duration(xpath_text(video_info, 'length'))
|
||||
|
@@ -3,13 +3,16 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
lowercase_escape,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
HEADRequest,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -131,10 +134,10 @@ class NBCSportsIE(InfoExtractor):
|
||||
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
|
||||
|
||||
|
||||
class NBCNewsIE(InfoExtractor):
|
||||
class NBCNewsIE(ThePlatformIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||
(?:video/.+?/(?P<id>\d+)|
|
||||
(?:watch|feature|nightly-news)/[^/]+/(?P<title>.+))
|
||||
([^/]+/)*(?P<display_id>[^/?]+))
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
@@ -149,15 +152,14 @@ class NBCNewsIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236',
|
||||
'md5': 'b2421750c9f260783721d898f4c42063',
|
||||
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
|
||||
'md5': 'af1adfa51312291a017720403826bb64',
|
||||
'info_dict': {
|
||||
'id': 'I1wpAI_zmhsQ',
|
||||
'id': '269389891880',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Twitter Reacted To The Snowden Interview',
|
||||
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
|
||||
@@ -168,17 +170,29 @@ class NBCNewsIE(InfoExtractor):
|
||||
'title': 'FULL EPISODE: Family Business',
|
||||
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
||||
},
|
||||
'skip': 'This page is unavailable.',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
||||
'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
|
||||
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
|
||||
'info_dict': {
|
||||
'id': 'sekXqyTVnmN3',
|
||||
'id': '394064451844',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
|
||||
'md5': 'a49e173825e5fcd15c13fc297fced39d',
|
||||
'info_dict': {
|
||||
'id': '529953347624',
|
||||
'ext': 'mp4',
|
||||
'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'',
|
||||
'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
|
||||
},
|
||||
'expected_warnings': ['http-6000 is not available']
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
|
||||
'only_matching': True,
|
||||
@@ -202,49 +216,80 @@ class NBCNewsIE(InfoExtractor):
|
||||
}
|
||||
else:
|
||||
# "feature" and "nightly-news" pages use theplatform.com
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
info = None
|
||||
bootstrap_json = self._search_regex(
|
||||
r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
|
||||
webpage, 'bootstrap json', flags=re.MULTILINE)
|
||||
bootstrap = self._parse_json(bootstrap_json, video_id)
|
||||
info = bootstrap['results'][0]['video']
|
||||
mpxid = info['mpxId']
|
||||
r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
|
||||
webpage, 'bootstrap json', default=None)
|
||||
if bootstrap_json:
|
||||
bootstrap = self._parse_json(bootstrap_json, display_id)
|
||||
info = bootstrap['results'][0]['video']
|
||||
else:
|
||||
player_instance_json = self._search_regex(
|
||||
r'videoObj\s*:\s*({.+})', webpage, 'player instance')
|
||||
info = self._parse_json(player_instance_json, display_id)
|
||||
video_id = info['mpxId']
|
||||
title = info['title']
|
||||
|
||||
base_urls = [
|
||||
info['fallbackPlaylistUrl'],
|
||||
info['associatedPlaylistUrl'],
|
||||
]
|
||||
subtitles = {}
|
||||
caption_links = info.get('captionLinks')
|
||||
if caption_links:
|
||||
for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')):
|
||||
sub_url = caption_links.get(sub_key)
|
||||
if sub_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': sub_url,
|
||||
'ext': sub_ext,
|
||||
})
|
||||
|
||||
for base_url in base_urls:
|
||||
if not base_url:
|
||||
formats = []
|
||||
for video_asset in info['videoAssets']:
|
||||
video_url = video_asset.get('publicUrl')
|
||||
if not video_url:
|
||||
continue
|
||||
playlist_url = base_url + '?form=MPXNBCNewsAPI'
|
||||
|
||||
try:
|
||||
all_videos = self._download_json(playlist_url, title)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError):
|
||||
continue
|
||||
raise
|
||||
|
||||
if not all_videos or 'videos' not in all_videos:
|
||||
container = video_asset.get('format')
|
||||
asset_type = video_asset.get('assetType') or ''
|
||||
if container == 'ISM' or asset_type == 'FireTV-Once':
|
||||
continue
|
||||
|
||||
try:
|
||||
info = next(v for v in all_videos['videos'] if v['mpxId'] == mpxid)
|
||||
break
|
||||
except StopIteration:
|
||||
continue
|
||||
|
||||
if info is None:
|
||||
raise ExtractorError('Could not find video in playlists')
|
||||
elif asset_type == 'OnceURL':
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
video_url, video_id)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
else:
|
||||
tbr = int_or_none(video_asset.get('bitRate'), 1000)
|
||||
format_id = 'http%s' % ('-%d' % tbr if tbr else '')
|
||||
video_url = update_url_query(
|
||||
video_url, {'format': 'redirect'})
|
||||
# resolve the url so that we can check availability and detect the correct extension
|
||||
head = self._request_webpage(
|
||||
HEADRequest(video_url), video_id,
|
||||
'Checking %s url' % format_id,
|
||||
'%s is not available' % format_id,
|
||||
fatal=False)
|
||||
if head:
|
||||
video_url = head.geturl()
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'width': int_or_none(video_asset.get('width')),
|
||||
'height': int_or_none(video_asset.get('height')),
|
||||
'tbr': tbr,
|
||||
'container': video_asset.get('format'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
# We get the best quality video
|
||||
'url': info['videoAssets'][-1]['publicUrl'],
|
||||
'ie_key': 'ThePlatform',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('description'),
|
||||
'thumbnail': info.get('thumbnail'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'timestamp': parse_iso8601(info.get('pubDate')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -2,10 +2,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_xpath,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -45,18 +50,33 @@ class NozIE(InfoExtractor):
|
||||
duration = int_or_none(xpath_text(
|
||||
doc, './/article/movie/file/duration'))
|
||||
formats = []
|
||||
for qnode in doc.findall('.//article/movie/file/qualities/qual'):
|
||||
video_node = qnode.find('./html_urls/video_url[@format="video/mp4"]')
|
||||
if video_node is None:
|
||||
continue # auto
|
||||
formats.append({
|
||||
'url': video_node.text,
|
||||
'format_name': xpath_text(qnode, './name'),
|
||||
'format_id': xpath_text(qnode, './id'),
|
||||
'height': int_or_none(xpath_text(qnode, './height')),
|
||||
'width': int_or_none(xpath_text(qnode, './width')),
|
||||
'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
|
||||
})
|
||||
for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')):
|
||||
http_url_ele = find_xpath_attr(
|
||||
qnode, './html_urls/video_url', 'format', 'video/mp4')
|
||||
http_url = http_url_ele.text if http_url_ele is not None else None
|
||||
if http_url:
|
||||
formats.append({
|
||||
'url': http_url,
|
||||
'format_name': xpath_text(qnode, './name'),
|
||||
'format_id': '%s-%s' % ('http', xpath_text(qnode, './id')),
|
||||
'height': int_or_none(xpath_text(qnode, './height')),
|
||||
'width': int_or_none(xpath_text(qnode, './width')),
|
||||
'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
|
||||
})
|
||||
else:
|
||||
f4m_url = xpath_text(qnode, 'url_hd2')
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(f4m_url, {'hdcore': '3.4.0'}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
m3u8_url_ele = find_xpath_attr(
|
||||
qnode, './html_urls/video_url',
|
||||
'format', 'application/vnd.apple.mpegurl')
|
||||
m3u8_url = m3u8_url_ele.text if m3u8_url_ele is not None else None
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -87,7 +90,7 @@ class NRKIE(InfoExtractor):
|
||||
|
||||
|
||||
class NRKPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||
@@ -126,6 +129,37 @@ class NRKPlaylistIE(InfoExtractor):
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
|
||||
class NRKSkoleIE(InfoExtractor):
|
||||
IE_DESC = 'NRK Skole'
|
||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/klippdetalj?.*\btopic=(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://nrk.no/skole/klippdetalj?topic=nrk:klipp/616532',
|
||||
'md5': '04cd85877cc1913bce73c5d28a47e00f',
|
||||
'info_dict': {
|
||||
'id': '6021',
|
||||
'ext': 'flv',
|
||||
'title': 'Genetikk og eneggede tvillinger',
|
||||
'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
|
||||
'duration': 399,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nrk.no/skole/klippdetalj?topic=nrk%3Aklipp%2F616532#embed',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nrk.no/skole/klippdetalj?topic=urn:x-mediadb:21379',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id')
|
||||
return self.url_result('nrk:%s' % nrk_id)
|
||||
|
||||
|
||||
class NRKTVIE(InfoExtractor):
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
_VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
|
37
youtube_dl/extractor/once.py
Normal file
37
youtube_dl/extractor/once.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class OnceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://once\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
|
||||
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
|
||||
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
|
||||
|
||||
def _extract_once_formats(self, url):
|
||||
domain_id, application_id, media_item_id = re.match(
|
||||
OnceIE._VALID_URL, url).groups()
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
self.ADAPTIVE_URL_TEMPLATE % (
|
||||
domain_id, application_id, media_item_id),
|
||||
media_item_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats = []
|
||||
formats.extend(adaptive_formats)
|
||||
for adaptive_format in adaptive_formats:
|
||||
rendition_id = self._search_regex(
|
||||
r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',
|
||||
adaptive_format['url'], 'redition id', default=None)
|
||||
if rendition_id:
|
||||
progressive_format = adaptive_format.copy()
|
||||
progressive_format.update({
|
||||
'url': self.PROGRESSIVE_URL_TEMPLATE % (
|
||||
domain_id, application_id, rendition_id, media_item_id),
|
||||
'format_id': adaptive_format['format_id'].replace(
|
||||
'hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(progressive_format)
|
||||
return formats
|
@@ -12,14 +12,14 @@ class PyvideoIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
|
||||
'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
|
||||
'md5': '520915673e53a5c5d487c36e0c4d85b5',
|
||||
'info_dict': {
|
||||
'id': '24_4WWkSmNo',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Become a logging expert in 30 minutes',
|
||||
'description': 'md5:9665350d466c67fb5b1598de379021f7',
|
||||
'upload_date': '20130320',
|
||||
'uploader': 'NextDayVideo',
|
||||
'uploader': 'Next Day Video',
|
||||
'uploader_id': 'NextDayVideo',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
|
@@ -19,7 +19,7 @@ class Revision3IE(InfoExtractor):
|
||||
'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
|
||||
'md5': 'd94a72d85d0a829766de4deb8daaf7df',
|
||||
'info_dict': {
|
||||
'id': '73034',
|
||||
'id': '71089',
|
||||
'display_id': 'technobuffalo/5-google-predictions-for-2016',
|
||||
'ext': 'webm',
|
||||
'title': '5 Google Predictions for 2016',
|
||||
@@ -31,6 +31,7 @@ class Revision3IE(InfoExtractor):
|
||||
'uploader_id': 'technobuffalo',
|
||||
}
|
||||
}, {
|
||||
# Show
|
||||
'url': 'http://testtube.com/brainstuff',
|
||||
'info_dict': {
|
||||
'id': '251',
|
||||
@@ -41,7 +42,7 @@ class Revision3IE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
|
||||
'info_dict': {
|
||||
'id': '60163',
|
||||
'id': '58227',
|
||||
'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
|
||||
'duration': 275,
|
||||
'ext': 'webm',
|
||||
@@ -52,18 +53,72 @@ class Revision3IE(InfoExtractor):
|
||||
'uploader': 'DNews',
|
||||
'uploader_id': 'dnews',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
|
||||
'info_dict': {
|
||||
'id': '71618',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
|
||||
'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
|
||||
'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
|
||||
'uploader': 'Editors\' Picks',
|
||||
'uploader_id': 'tt-editors-picks',
|
||||
'timestamp': 1453309200,
|
||||
'upload_date': '20160120',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
# Tag
|
||||
'url': 'http://testtube.com/tech-news',
|
||||
'info_dict': {
|
||||
'id': '21018',
|
||||
'title': 'tech news',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}]
|
||||
_PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
|
||||
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site = domain.split('.')[0]
|
||||
page_info = self._download_json(
|
||||
self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
|
||||
|
||||
if page_info['data']['type'] == 'episode':
|
||||
episode_data = page_info['data']
|
||||
video_id = compat_str(episode_data['video']['data']['id'])
|
||||
page_data = page_info['data']
|
||||
page_type = page_data['type']
|
||||
if page_type in ('episode', 'embed'):
|
||||
show_data = page_data['show']['data']
|
||||
page_id = compat_str(page_data['id'])
|
||||
video_id = compat_str(page_data['video']['data']['id'])
|
||||
|
||||
preference = qualities(['mini', 'small', 'medium', 'large'])
|
||||
thumbnails = [{
|
||||
'url': image_url,
|
||||
'id': image_id,
|
||||
'preference': preference(image_id)
|
||||
} for image_id, image_url in page_data.get('images', {}).items()]
|
||||
|
||||
info = {
|
||||
'id': page_id,
|
||||
'display_id': display_id,
|
||||
'title': unescapeHTML(page_data['name']),
|
||||
'description': unescapeHTML(page_data.get('summary')),
|
||||
'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
|
||||
'author': page_data.get('author'),
|
||||
'uploader': show_data.get('name'),
|
||||
'uploader_id': show_data.get('slug'),
|
||||
'thumbnails': thumbnails,
|
||||
'extractor_key': site,
|
||||
}
|
||||
|
||||
if page_type == 'embed':
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': page_data['video']['data']['embed'],
|
||||
})
|
||||
return info
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
|
||||
video_id)['items'][0]
|
||||
@@ -84,36 +139,30 @@ class Revision3IE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
preference = qualities(['mini', 'small', 'medium', 'large'])
|
||||
thumbnails = [{
|
||||
'url': image_url,
|
||||
'id': image_id,
|
||||
'preference': preference(image_id)
|
||||
} for image_id, image_url in video_data.get('images', {}).items()]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
info.update({
|
||||
'title': unescapeHTML(video_data['title']),
|
||||
'description': unescapeHTML(video_data.get('summary')),
|
||||
'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '),
|
||||
'author': episode_data.get('author'),
|
||||
'uploader': video_data.get('show', {}).get('name'),
|
||||
'uploader_id': video_data.get('show', {}).get('slug'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
return info
|
||||
else:
|
||||
show_data = page_info['show']['data']
|
||||
list_data = page_info[page_type]['data']
|
||||
episodes_data = page_info['episodes']['data']
|
||||
num_episodes = page_info['meta']['totalEpisodes']
|
||||
processed_episodes = 0
|
||||
entries = []
|
||||
page_num = 1
|
||||
while True:
|
||||
entries.extend([self.url_result(
|
||||
'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data])
|
||||
entries.extend([{
|
||||
'_type': 'url',
|
||||
'url': 'http://%s%s' % (domain, episode['path']),
|
||||
'id': compat_str(episode['id']),
|
||||
'ie_key': 'Revision3',
|
||||
'extractor_key': site,
|
||||
} for episode in episodes_data])
|
||||
processed_episodes += len(episodes_data)
|
||||
if processed_episodes == num_episodes:
|
||||
break
|
||||
@@ -123,5 +172,5 @@ class Revision3IE(InfoExtractor):
|
||||
display_id)['episodes']['data']
|
||||
|
||||
return self.playlist_result(
|
||||
entries, compat_str(show_data['id']),
|
||||
show_data.get('name'), show_data.get('summary'))
|
||||
entries, compat_str(list_data['id']),
|
||||
list_data.get('name'), list_data.get('summary'))
|
||||
|
116
youtube_dl/extractor/rice.py
Normal file
116
youtube_dl/extractor/rice.py
Normal file
@@ -0,0 +1,116 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class RICEIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)'
|
||||
_TEST = {
|
||||
'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw',
|
||||
'md5': '9b83b4a2eead4912dc3b7fac7c449b6a',
|
||||
'info_dict': {
|
||||
'id': 'YEWIvbhb40aqdjMD1ALSqw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Active Learning in Archeology',
|
||||
'upload_date': '20140616',
|
||||
'timestamp': 1402926346,
|
||||
}
|
||||
}
|
||||
_NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config'
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
|
||||
if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
portal_id = qs['PortalID'][0]
|
||||
playlist_id = qs['DestinationID'][0]
|
||||
content_id = qs['ContentID'][0]
|
||||
|
||||
content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={
|
||||
'portalId': portal_id,
|
||||
'playlistId': playlist_id,
|
||||
'contentId': content_id
|
||||
})
|
||||
metadata = xpath_element(content_data, './/metaData', fatal=True)
|
||||
title = xpath_text(metadata, 'primaryTitle', fatal=True)
|
||||
encodings = xpath_element(content_data, './/encodings', fatal=True)
|
||||
player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={
|
||||
'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True),
|
||||
'contentId': content_id,
|
||||
})
|
||||
|
||||
common_fmt = {}
|
||||
dimensions = xpath_text(encodings, 'dimensions')
|
||||
if dimensions:
|
||||
wh = dimensions.split('x')
|
||||
if len(wh) == 2:
|
||||
common_fmt.update({
|
||||
'width': int_or_none(wh[0]),
|
||||
'height': int_or_none(wh[1]),
|
||||
})
|
||||
|
||||
formats = []
|
||||
rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS))
|
||||
if rtsp_path:
|
||||
fmt = {
|
||||
'url': rtsp_path,
|
||||
'format_id': 'rtsp',
|
||||
}
|
||||
fmt.update(common_fmt)
|
||||
formats.append(fmt)
|
||||
for source in player_data.findall(self._xpath_ns('.//Source', self._NS)):
|
||||
video_url = xpath_text(source, self._xpath_ns('File', self._NS))
|
||||
if not video_url:
|
||||
continue
|
||||
if '.m3u8' in video_url:
|
||||
formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
'format_id': video_url.split(':')[0],
|
||||
}
|
||||
fmt.update(common_fmt)
|
||||
rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for content_asset in content_data.findall('.//contentAssets'):
|
||||
asset_type = xpath_text(content_asset, 'type')
|
||||
if asset_type == 'image':
|
||||
image_url = xpath_text(content_asset, 'httpPath')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': xpath_text(content_asset, 'ID'),
|
||||
'url': image_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': xpath_text(metadata, 'abstract'),
|
||||
'duration': int_or_none(xpath_text(metadata, 'duration')),
|
||||
'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
remove_end,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
struct_unpack,
|
||||
@@ -178,14 +179,14 @@ class RTVEInfantilIE(InfoExtractor):
|
||||
class RTVELiveIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:live'
|
||||
IE_DESC = 'RTVE.es live streams'
|
||||
_VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
|
||||
_VALID_URL = r'http://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/noticias/directo-la-1/',
|
||||
'url': 'http://www.rtve.es/directo/la-1/',
|
||||
'info_dict': {
|
||||
'id': 'directo-la-1',
|
||||
'ext': 'flv',
|
||||
'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
||||
'id': 'la-1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
@@ -198,23 +199,20 @@ class RTVELiveIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_url = self._search_regex(
|
||||
r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
|
||||
title = remove_end(self._og_search_title(webpage), ' en directo')
|
||||
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
||||
title = remove_start(title, 'Estoy viendo ')
|
||||
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
||||
|
||||
vidplayer_id = self._search_regex(
|
||||
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||
r'playerId=player([0-9]+)', webpage, 'internal video ID')
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
m3u8_url = _decrypt_url(png)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'app': 'rtve-live-live?ovpfv=2.1.2',
|
||||
'player_url': player_url,
|
||||
'rtmp_live': True,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
@@ -4,14 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
std_headers,
|
||||
urlencode_postdata,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,28 +19,30 @@ class SafariBaseIE(InfoExtractor):
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
|
||||
_NETRC_MACHINE = 'safari'
|
||||
|
||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
|
||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1'
|
||||
_API_FORMAT = 'json'
|
||||
|
||||
LOGGED_IN = False
|
||||
|
||||
def _real_initialize(self):
|
||||
# We only need to log in once for courses or individual videos
|
||||
if not self.LOGGED_IN:
|
||||
self._login()
|
||||
SafariBaseIE.LOGGED_IN = True
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
# We only need to log in once for courses or individual videos
|
||||
if self.LOGGED_IN:
|
||||
return
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required('safaribooksonline.com account is required')
|
||||
return
|
||||
|
||||
headers = std_headers
|
||||
headers = std_headers.copy()
|
||||
if 'Referer' not in headers:
|
||||
headers['Referer'] = self._LOGIN_URL
|
||||
login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers)
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None,
|
||||
login_page_request, None,
|
||||
'Downloading login form')
|
||||
|
||||
csrf = self._html_search_regex(
|
||||
@@ -66,6 +67,8 @@ class SafariBaseIE(InfoExtractor):
|
||||
'Login failed; make sure your credentials are correct and try again.',
|
||||
expected=True)
|
||||
|
||||
SafariBaseIE.LOGGED_IN = True
|
||||
|
||||
self.to_screen('Login successful')
|
||||
|
||||
|
||||
@@ -85,13 +88,15 @@ class SafariIE(SafariBaseIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
|
||||
'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
|
||||
'md5': 'dcc5a425e79f2564148652616af1f2a3',
|
||||
'info_dict': {
|
||||
'id': '2842601850001',
|
||||
'id': '0_qbqx90ic',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introduction',
|
||||
'title': 'Introduction to Hadoop Fundamentals LiveLessons',
|
||||
'timestamp': 1437758058,
|
||||
'upload_date': '20150724',
|
||||
'uploader_id': 'stork',
|
||||
},
|
||||
'skip': 'Requires safaribooksonline account credentials',
|
||||
}, {
|
||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
||||
'only_matching': True,
|
||||
@@ -106,15 +111,30 @@ class SafariIE(SafariBaseIE):
|
||||
course_id = mobj.group('course_id')
|
||||
part = mobj.group('part')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
|
||||
part)
|
||||
webpage = self._download_webpage(url, '%s/%s' % (course_id, part))
|
||||
reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id')
|
||||
partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id')
|
||||
ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id')
|
||||
|
||||
bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
if not bc_url:
|
||||
raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
|
||||
query = {
|
||||
'wid': '_%s' % partner_id,
|
||||
'uiconf_id': ui_id,
|
||||
'flashvars[referenceId]': reference_id,
|
||||
}
|
||||
|
||||
return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy')
|
||||
if self.LOGGED_IN:
|
||||
kaltura_session = self._download_json(
|
||||
'%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
|
||||
course_id, 'Downloading kaltura session JSON',
|
||||
'Unable to download kaltura session JSON', fatal=False)
|
||||
if kaltura_session:
|
||||
session = kaltura_session.get('session')
|
||||
if session:
|
||||
query['flashvars[ks]'] = session
|
||||
|
||||
return self.url_result(update_url_query(
|
||||
'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query),
|
||||
'Kaltura')
|
||||
|
||||
|
||||
class SafariCourseIE(SafariBaseIE):
|
||||
@@ -140,7 +160,7 @@ class SafariCourseIE(SafariBaseIE):
|
||||
course_id = self._match_id(url)
|
||||
|
||||
course_json = self._download_json(
|
||||
'%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
|
||||
'%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
|
||||
course_id, 'Downloading course JSON')
|
||||
|
||||
if 'chapters' not in course_json:
|
||||
|
@@ -2,6 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class SBSIE(InfoExtractor):
|
||||
@@ -31,21 +35,28 @@ class SBSIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
player_params = self._download_json(
|
||||
'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.sbs.com.au/ondemand/video/single/%s?context=web' % video_id, video_id)
|
||||
|
||||
player_params = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)var\s+playerParams\s*=\s*({.+?});', webpage, 'playerParams'),
|
||||
video_id)
|
||||
error = player_params.get('error')
|
||||
if error:
|
||||
error_message = 'Sorry, The video you are looking for does not exist.'
|
||||
video_data = error.get('results') or {}
|
||||
error_code = error.get('errorCode')
|
||||
if error_code == 'ComingSoon':
|
||||
error_message = '%s is not yet available.' % video_data.get('title', '')
|
||||
elif error_code in ('Forbidden', 'intranetAccessOnly'):
|
||||
error_message = 'Sorry, This video cannot be accessed via this website'
|
||||
elif error_code == 'Expired':
|
||||
error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
|
||||
urls = player_params['releaseUrls']
|
||||
theplatform_url = (urls.get('progressive') or urls.get('standard') or
|
||||
urls.get('html') or player_params['relatedItemsURL'])
|
||||
theplatform_url = (urls.get('progressive') or urls.get('html') or
|
||||
urls.get('standard') or player_params['relatedItemsURL'])
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': theplatform_url,
|
||||
'url': smuggle_url(theplatform_url, {'force_smil_url': True}),
|
||||
}
|
||||
|
@@ -70,25 +70,27 @@ class ScreenwaveMediaIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if source['type'] == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(source['file'], video_id, ext='mp4'))
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
if source.get('type') == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4'))
|
||||
else:
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
format_label = source.get('label')
|
||||
format_id = self._search_regex(
|
||||
r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
|
||||
if not self._is_valid_url(file_, video_id, format_id or 'video'):
|
||||
continue
|
||||
format_label = source.get('label')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]', format_label, 'height', default=None))
|
||||
formats.append({
|
||||
'url': source['file'],
|
||||
'url': file_,
|
||||
'format_id': format_id,
|
||||
'format': format_label,
|
||||
'ext': source.get('type'),
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -14,7 +12,7 @@ class SexuIE(InfoExtractor):
|
||||
'id': '961791',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
|
||||
'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
|
||||
'description': 'md5:2b75327061310a3afb3fbd7d09e2e403',
|
||||
'categories': list, # NSFW
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
@@ -25,13 +23,18 @@ class SexuIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
quality_arr = self._search_regex(
|
||||
r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
|
||||
jwvideo = self._parse_json(
|
||||
self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'),
|
||||
video_id)
|
||||
|
||||
sources = jwvideo['sources']
|
||||
|
||||
formats = [{
|
||||
'url': fmt[0].replace('\\', ''),
|
||||
'format_id': fmt[1],
|
||||
'height': int(fmt[1][:3]),
|
||||
} for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
|
||||
'url': source['file'].replace('\\', ''),
|
||||
'format_id': source.get('label'),
|
||||
'height': self._search_regex(
|
||||
r'^(\d+)[pP]', source.get('label', ''), 'height', default=None),
|
||||
} for source in sources if source.get('file')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
@@ -40,9 +43,7 @@ class SexuIE(InfoExtractor):
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'image:\s*"([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
thumbnail = jwvideo.get('image')
|
||||
|
||||
categories_str = self._html_search_meta(
|
||||
'keywords', webpage, 'categories')
|
||||
|
@@ -1,38 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from ..utils import RegexNotFoundError, ExtractorError
|
||||
|
||||
|
||||
class SpaceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
|
||||
_TEST = {
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
|
||||
'info_dict': {
|
||||
'id': '2780937028001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
|
||||
'description': 'md5:db81cf7f3122f95ed234b631a6ea1e61',
|
||||
'uploader': 'TechMedia Networks',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
try:
|
||||
# Some videos require the playerKey field, which isn't define in
|
||||
# the BrightcoveExperience object
|
||||
brightcove_url = self._og_search_video_url(webpage)
|
||||
except RegexNotFoundError:
|
||||
# Other videos works fine with the info from the object
|
||||
brightcove_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
if brightcove_url is None:
|
||||
raise ExtractorError(
|
||||
'The webpage does not contain a video', expected=True)
|
||||
return self.url_result(brightcove_url, BrightcoveLegacyIE.ie_key())
|
@@ -19,20 +19,25 @@ class SVTBaseIE(InfoExtractor):
|
||||
video_info = info['video']
|
||||
formats = []
|
||||
for vr in video_info['videoReferences']:
|
||||
player_type = vr.get('playerType')
|
||||
vurl = vr['url']
|
||||
ext = determine_ext(vurl)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
vurl, video_id,
|
||||
ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=vr.get('playerType')))
|
||||
m3u8_id=player_type, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
vurl + '?hdcore=3.3.0', video_id,
|
||||
f4m_id=vr.get('playerType')))
|
||||
f4m_id=player_type, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
if player_type == 'dashhbbtv':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
vurl, video_id, mpd_id=player_type, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': vr.get('playerType'),
|
||||
'format_id': player_type,
|
||||
'url': vurl,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
@@ -73,7 +73,7 @@ class TEDIE(InfoExtractor):
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': '_ZG8HBuDjgc',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Douglas Adams: Parrots the Universe and Everything',
|
||||
'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
|
||||
'uploader': 'University of California Television (UCTV)',
|
||||
|
@@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
wat_id = self._html_search_regex(
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1',
|
||||
webpage, 'wat id', group='id')
|
||||
return self.url_result('wat:%s' % wat_id, 'Wat')
|
||||
|
@@ -8,13 +8,12 @@ import binascii
|
||||
import hashlib
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .once import OnceIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@@ -22,36 +21,35 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
xpath_with_ns,
|
||||
mimetype2ext,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||
|
||||
|
||||
class ThePlatformBaseIE(InfoExtractor):
|
||||
class ThePlatformBaseIE(OnceIE):
|
||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||
meta = self._download_xml(smil_url, video_id, note=note)
|
||||
try:
|
||||
error_msg = next(
|
||||
n.attrib['abstract']
|
||||
for n in meta.findall(_x('.//smil:ref'))
|
||||
if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
|
||||
error_element = find_xpath_attr(
|
||||
meta, _x('.//smil:ref'), 'src',
|
||||
'http://link.theplatform.com/s/errorFiles/Unavailable.mp4')
|
||||
if error_element is not None:
|
||||
raise ExtractorError(error_element.attrib['abstract'], expected=True)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
smil_formats = self._parse_smil_formats(
|
||||
meta, smil_url, video_id, namespace=default_ns,
|
||||
# the parameters are from syfy.com, other sites may use others,
|
||||
# they also work for nbc.com
|
||||
f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
|
||||
transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
|
||||
|
||||
for _format in formats:
|
||||
ext = determine_ext(_format['url'])
|
||||
if ext == 'once':
|
||||
_format['ext'] = 'mp4'
|
||||
formats = []
|
||||
for _format in smil_formats:
|
||||
if OnceIE.suitable(_format['url']):
|
||||
formats.extend(self._extract_once_formats(_format['url']))
|
||||
else:
|
||||
formats.append(_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -128,7 +126,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
|
||||
'md5': '734f3790fb5fc4903da391beeebc4836',
|
||||
'md5': 'fb96bb3d85118930a5b055783a3bd992',
|
||||
'info_dict': {
|
||||
'id': 'tdy_or_siri_150701',
|
||||
'ext': 'mp4',
|
||||
@@ -138,7 +136,6 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1435752600,
|
||||
'upload_date': '20150701',
|
||||
'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
|
||||
},
|
||||
}, {
|
||||
# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
|
||||
@@ -216,7 +213,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
webpage, 'smil url', group='url')
|
||||
path = self._search_regex(
|
||||
r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
|
||||
smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
|
||||
smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4'
|
||||
elif mobj.group('config'):
|
||||
config_url = url + '&form=json'
|
||||
config_url = config_url.replace('swf/', 'config/')
|
||||
@@ -226,9 +223,9 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
release_url = config['releaseUrl']
|
||||
else:
|
||||
release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||
smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||
smil_url = release_url + '&formats=MPEG4&manifest=f4m'
|
||||
else:
|
||||
smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
|
||||
smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||
|
||||
sig = smuggled_data.get('sig')
|
||||
if sig:
|
||||
@@ -253,7 +250,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
_TEST = {
|
||||
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
|
||||
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
|
||||
'md5': '22d2b84f058d3586efcd99e57d59d314',
|
||||
'md5': '6e32495b5073ab414471b615c5ded394',
|
||||
'info_dict': {
|
||||
'id': 'n_hardball_5biden_140207',
|
||||
'ext': 'mp4',
|
||||
@@ -283,7 +280,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
first_video_id = None
|
||||
duration = None
|
||||
for item in entry['media$content']:
|
||||
smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
|
||||
smil_url = item['plfile$url'] + '&mbr=true'
|
||||
cur_video_id = ThePlatformIE._match_id(smil_url)
|
||||
if first_video_id is None:
|
||||
first_video_id = cur_video_id
|
||||
|
31
youtube_dl/extractor/thestar.py
Normal file
31
youtube_dl/extractor/thestar.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from ..compat import compat_parse_qs
|
||||
|
||||
|
||||
class TheStarIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thestar\.com/(?:[^/]+/)*(?P<id>.+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.thestar.com/life/2016/02/01/mankind-why-this-woman-started-a-men-s-skincare-line.html',
|
||||
'md5': '2c62dd4db2027e35579fefb97a8b6554',
|
||||
'info_dict': {
|
||||
'id': '4732393888001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mankind: Why this woman started a men\'s skin care line',
|
||||
'description': 'Robert Cribb talks to Young Lee, the founder of Uncle Peter\'s MAN.',
|
||||
'uploader_id': '794267642001',
|
||||
'timestamp': 1454353482,
|
||||
'upload_date': '20160201',
|
||||
}
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
@@ -4,12 +4,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import compat_parse_qs
|
||||
|
||||
|
||||
class TlcDeIE(InfoExtractor):
|
||||
IE_NAME = 'tlc.de'
|
||||
_VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
|
||||
_VALID_URL = r'http://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
||||
@@ -17,32 +17,23 @@ class TlcDeIE(InfoExtractor):
|
||||
'id': '3235167922001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Breaking Amish: Die Welt da draußen',
|
||||
'uploader': 'Discovery Networks - Germany',
|
||||
'description': (
|
||||
'Vier Amische und eine Mennonitin wagen in New York'
|
||||
' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
|
||||
' ihrem spannenden Weg.'),
|
||||
'timestamp': 1396598084,
|
||||
'upload_date': '20140404',
|
||||
'uploader_id': '1659832546',
|
||||
},
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
iframe_url = self._search_regex(
|
||||
'<iframe src="(http://www\.tlc\.de/wp-content/.+?)"', webpage,
|
||||
'iframe url')
|
||||
# Otherwise we don't get the correct 'BrightcoveExperience' element,
|
||||
# example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
|
||||
iframe_url = iframe_url.replace('.htm?', '.php?')
|
||||
url_fragment = compat_urlparse.urlparse(url).fragment
|
||||
if url_fragment:
|
||||
# Since the fragment is not send to the server, we always get the same iframe
|
||||
iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url)
|
||||
iframe = self._download_webpage(iframe_url, title)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': BrightcoveLegacyIE._extract_brightcove_url(iframe),
|
||||
'ie': BrightcoveLegacyIE.ie_key(),
|
||||
}
|
||||
brightcove_id = mobj.group('id')
|
||||
if not brightcove_id:
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
|
@@ -71,7 +71,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('display_id') if 'display_id' in mobj.groupdict() else video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
@@ -117,7 +117,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
age_limit = self._rta_search(webpage) or 18
|
||||
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration', default=None))
|
||||
@@ -152,6 +152,36 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
|
||||
_VALID_URL = r'https?://player\.(?:tna|emp)flix\.com/video/(?P<id>\d+)'
|
||||
|
||||
_TITLE_REGEX = r'<title>([^<]+)</title>'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://player.tnaflix.com/video/6538',
|
||||
'info_dict': {
|
||||
'id': '6538',
|
||||
'display_id': '6538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Educational xxx video',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://player.empflix.com/video/33051',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.(?:tna|emp)flix\.com/video/\d+)\1',
|
||||
webpage)]
|
||||
|
||||
|
||||
class TNAFlixIE(TNAFlixNetworkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
|
||||
|
||||
|
33
youtube_dl/extractor/tv3.py
Normal file
33
youtube_dl/extractor/tv3.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TV3IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
|
||||
_TEST = {
|
||||
'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
|
||||
'info_dict': {
|
||||
'id': '4659127992001',
|
||||
'ext': 'mp4',
|
||||
'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
|
||||
'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
|
||||
'uploader_id': '3812193411001',
|
||||
'upload_date': '20151213',
|
||||
'timestamp': 1449975272,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Failed to download MPD manifest'
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
encode_dict,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
@@ -251,6 +252,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
self._USHER_BASE, item_id,
|
||||
compat_urllib_parse.urlencode({
|
||||
'allow_source': 'true',
|
||||
'allow_audio_only': 'true',
|
||||
'allow_spectre': 'true',
|
||||
'player': 'twitchweb',
|
||||
'nauth': access_token['token'],
|
||||
@@ -281,17 +283,37 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
|
||||
entries = []
|
||||
offset = 0
|
||||
limit = self._PAGE_LIMIT
|
||||
broken_paging_detected = False
|
||||
counter_override = None
|
||||
for counter in itertools.count(1):
|
||||
response = self._download_json(
|
||||
self._PLAYLIST_URL % (channel_id, offset, limit),
|
||||
channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
|
||||
channel_id,
|
||||
'Downloading %s videos JSON page %s'
|
||||
% (self._PLAYLIST_TYPE, counter_override or counter))
|
||||
page_entries = self._extract_playlist_page(response)
|
||||
if not page_entries:
|
||||
break
|
||||
total = int_or_none(response.get('_total'))
|
||||
# Since the beginning of March 2016 twitch's paging mechanism
|
||||
# is completely broken on the twitch side. It simply ignores
|
||||
# a limit and returns the whole offset number of videos.
|
||||
# Working around by just requesting all videos at once.
|
||||
# Upd: pagination bug was fixed by twitch on 15.03.2016.
|
||||
if not broken_paging_detected and total and len(page_entries) > limit:
|
||||
self.report_warning(
|
||||
'Twitch pagination is broken on twitch side, requesting all videos at once',
|
||||
channel_id)
|
||||
broken_paging_detected = True
|
||||
offset = total
|
||||
counter_override = '(all at once)'
|
||||
continue
|
||||
entries.extend(page_entries)
|
||||
if broken_paging_detected or total and len(page_entries) >= total:
|
||||
break
|
||||
offset += limit
|
||||
return self.playlist_result(
|
||||
[self.url_result(entry) for entry in set(entries)],
|
||||
[self.url_result(entry) for entry in orderedSet(entries)],
|
||||
channel_id, channel_name)
|
||||
|
||||
def _extract_playlist_page(self, response):
|
||||
@@ -411,6 +433,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
|
||||
query = {
|
||||
'allow_source': 'true',
|
||||
'allow_audio_only': 'true',
|
||||
'p': random.randint(1000000, 10000000),
|
||||
'player': 'twitchweb',
|
||||
'segment_preference': '4',
|
||||
|
@@ -10,7 +10,6 @@ from ..utils import (
|
||||
remove_end,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,7 +21,7 @@ class TwitterBaseIE(InfoExtractor):
|
||||
|
||||
class TwitterCardIE(TwitterBaseIE):
|
||||
IE_NAME = 'twitter:card'
|
||||
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
|
||||
@@ -30,7 +29,7 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
'info_dict': {
|
||||
'id': '560070183650213889',
|
||||
'ext': 'mp4',
|
||||
'title': 'TwitterCard',
|
||||
'title': 'Twitter Card',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 30.033,
|
||||
}
|
||||
@@ -41,7 +40,7 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
'info_dict': {
|
||||
'id': '623160978427936768',
|
||||
'ext': 'mp4',
|
||||
'title': 'TwitterCard',
|
||||
'title': 'Twitter Card',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 80.155,
|
||||
},
|
||||
@@ -72,63 +71,102 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
'title': 'Vine by ArsenalTerje',
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
|
||||
'md5': '3846d0a07109b5ab622425449b59049d',
|
||||
'info_dict': {
|
||||
'id': '705235433198714880',
|
||||
'ext': 'mp4',
|
||||
'title': 'Twitter web player',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Different formats served for different User-Agents
|
||||
USER_AGENTS = [
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)', # mp4
|
||||
'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0', # webm
|
||||
]
|
||||
|
||||
config = None
|
||||
formats = []
|
||||
for user_agent in USER_AGENTS:
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('User-Agent', user_agent)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
duration = None
|
||||
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
|
||||
webpage, 'video iframe', default=None)
|
||||
if iframe_url:
|
||||
return self.url_result(iframe_url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(self._html_search_regex(
|
||||
r'data-player-config="([^"]+)"', webpage, 'data player config'),
|
||||
video_id)
|
||||
if 'playlist' not in config:
|
||||
if 'vmapUrl' in config:
|
||||
formats.append({
|
||||
'url': self._get_vmap_video_url(config['vmapUrl'], video_id),
|
||||
})
|
||||
break # same video regardless of UA
|
||||
continue
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
|
||||
webpage, 'video iframe', default=None)
|
||||
if iframe_url:
|
||||
return self.url_result(iframe_url)
|
||||
|
||||
video_url = config['playlist'][0]['source']
|
||||
config = self._parse_json(self._html_search_regex(
|
||||
r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
def _search_dimensions_in_video_url(a_format, video_url):
|
||||
m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
|
||||
if m:
|
||||
a_format.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
|
||||
playlist = config.get('playlist')
|
||||
if playlist:
|
||||
video_url = playlist[0]['source']
|
||||
|
||||
f = {
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
|
||||
if m:
|
||||
f.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
_search_dimensions_in_video_url(f, video_url)
|
||||
|
||||
formats.append(f)
|
||||
|
||||
vmap_url = config.get('vmapUrl') or config.get('vmap_url')
|
||||
if vmap_url:
|
||||
formats.append({
|
||||
'url': self._get_vmap_video_url(vmap_url, video_id),
|
||||
})
|
||||
|
||||
media_info = None
|
||||
|
||||
for entity in config.get('status', {}).get('entities', []):
|
||||
if 'mediaInfo' in entity:
|
||||
media_info = entity['mediaInfo']
|
||||
|
||||
if media_info:
|
||||
for media_variant in media_info['variants']:
|
||||
media_url = media_variant['url']
|
||||
if media_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||
elif media_url.endswith('.mpd'):
|
||||
formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
|
||||
else:
|
||||
vbr = int_or_none(media_variant.get('bitRate'), scale=1000)
|
||||
a_format = {
|
||||
'url': media_url,
|
||||
'format_id': 'http-%d' % vbr if vbr else 'http',
|
||||
'vbr': vbr,
|
||||
}
|
||||
# Reported bitRate may be zero
|
||||
if not a_format['vbr']:
|
||||
del a_format['vbr']
|
||||
|
||||
_search_dimensions_in_video_url(a_format, media_url)
|
||||
|
||||
formats.append(a_format)
|
||||
|
||||
duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = config.get('posterImageUrl')
|
||||
duration = float_or_none(config.get('duration'))
|
||||
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
thumbnail = config.get('posterImageUrl') or config.get('image_src')
|
||||
duration = float_or_none(config.get('duration')) or duration
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'TwitterCard',
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
@@ -142,7 +180,6 @@ class TwitterIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
||||
# MD5 checksums are different in different places
|
||||
'info_dict': {
|
||||
'id': '643211948184596480',
|
||||
'ext': 'mp4',
|
||||
@@ -153,6 +190,9 @@ class TwitterIE(InfoExtractor):
|
||||
'uploader': 'FREE THE NIPPLE',
|
||||
'uploader_id': 'freethenipple',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
|
||||
'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
|
||||
@@ -177,6 +217,36 @@ class TwitterIE(InfoExtractor):
|
||||
'uploader_id': 'starwars',
|
||||
'uploader': 'Star Wars',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
|
||||
'info_dict': {
|
||||
'id': '705235433198714880',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.',
|
||||
'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."',
|
||||
'uploader_id': 'BTNBrentYarina',
|
||||
'uploader': 'Brent Yarina',
|
||||
},
|
||||
'params': {
|
||||
# The same video as https://twitter.com/i/videos/tweet/705235433198714880
|
||||
# Test case of TwitterCardIE
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '700207533655363584',
|
||||
'ext': 'mp4',
|
||||
'title': 'jay - BEAT PROD: @suhmeduh #Damndaniel',
|
||||
'description': 'jay on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'jay',
|
||||
'uploader_id': 'jaydingeer',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -234,6 +304,15 @@ class TwitterIE(InfoExtractor):
|
||||
})
|
||||
return info
|
||||
|
||||
if 'class="PlayableMedia' in webpage:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'TwitterCard',
|
||||
'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid),
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
raise ExtractorError('There\'s no video in this tweet.')
|
||||
|
||||
|
||||
|
@@ -144,7 +144,8 @@ class UdemyIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, lecture_id)
|
||||
|
||||
course_id = self._search_regex(
|
||||
r'data-course-id=["\'](\d+)', webpage, 'course id')
|
||||
(r'data-course-id=["\'](\d+)', r'"id"\s*:\s*(\d+)'),
|
||||
webpage, 'course id')
|
||||
|
||||
try:
|
||||
lecture = self._download_lecture(course_id, lecture_id)
|
||||
|
48
youtube_dl/extractor/usatoday.py
Normal file
48
youtube_dl/extractor/usatoday.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_attribute,
|
||||
parse_duration,
|
||||
update_url_query,
|
||||
ExtractorError,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class USATodayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P<id>[^?/#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/',
|
||||
'md5': '4d40974481fa3475f8bccfd20c5361f8',
|
||||
'info_dict': {
|
||||
'id': '81729424',
|
||||
'ext': 'mp4',
|
||||
'title': 'US, France warn Syrian regime ahead of new peace talks',
|
||||
'timestamp': 1457891045,
|
||||
'description': 'md5:7e50464fdf2126b0f533748d3c78d58f',
|
||||
'uploader_id': '29906170001',
|
||||
'upload_date': '20160313',
|
||||
}
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/29906170001/38a9eecc-bdd8-42a3-ba14-95397e48b3f8_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(update_url_query(url, {'ajax': 'true'}), display_id)
|
||||
ui_video_data = get_element_by_attribute('class', 'ui-video-data', webpage)
|
||||
if not ui_video_data:
|
||||
raise ExtractorError('no video on the webpage', expected=True)
|
||||
video_data = self._parse_json(ui_video_data, display_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self.BRIGHTCOVE_URL_TEMPLATE % video_data['brightcove_id'],
|
||||
'id': compat_str(video_data['id']),
|
||||
'title': video_data['title'],
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'description': video_data.get('description'),
|
||||
'duration': parse_duration(video_data.get('length')),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
67
youtube_dl/extractor/ustudio.py
Normal file
67
youtube_dl/extractor/ustudio.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class UstudioIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge',
|
||||
'md5': '58bbfca62125378742df01fc2abbdef6',
|
||||
'info_dict': {
|
||||
'id': 'Uxu2my9bgSph',
|
||||
'display_id': 'san_francisco_golden_gate_bridge',
|
||||
'ext': 'mp4',
|
||||
'title': 'San Francisco: Golden Gate Bridge',
|
||||
'description': 'md5:23925500697f2c6d4830e387ba51a9be',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20111107',
|
||||
'uploader': 'Tony Farley',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
config = self._download_xml(
|
||||
'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id,
|
||||
display_id)
|
||||
|
||||
def extract(kind):
|
||||
return [{
|
||||
'url': item.attrib['url'],
|
||||
'width': int_or_none(item.get('width')),
|
||||
'height': int_or_none(item.get('height')),
|
||||
} for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
|
||||
|
||||
formats = extract('video')
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'(?s)Uploaded by\s*.+?\s*on\s*<span>([^<]+)</span>',
|
||||
webpage, 'upload date', fatal=False))
|
||||
uploader = self._search_regex(
|
||||
r'Uploaded by\s*<a[^>]*>([^<]+)<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnails': extract('image'),
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
@@ -20,6 +20,7 @@ class VGTVIE(XstreamIE):
|
||||
'aftenbladet.no/tv': 'satv',
|
||||
'fvn.no/fvntv': 'fvntv',
|
||||
'aftenposten.no/webtv': 'aptv',
|
||||
'ap.vgtv.no/webtv': 'aptv',
|
||||
}
|
||||
|
||||
_APP_NAME_TO_VENDOR = {
|
||||
@@ -35,7 +36,7 @@ class VGTVIE(XstreamIE):
|
||||
(?P<host>
|
||||
%s
|
||||
)
|
||||
/
|
||||
/?
|
||||
(?:
|
||||
\#!/(?:video|live)/|
|
||||
embed?.*id=
|
||||
@@ -107,19 +108,27 @@ class VGTVIE(XstreamIE):
|
||||
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
||||
'info_dict': {
|
||||
'id': '21039',
|
||||
'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
|
||||
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
|
||||
'duration': 66,
|
||||
'timestamp': 1417002452,
|
||||
'upload_date': '20141126',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -144,8 +153,6 @@ class VGTVIE(XstreamIE):
|
||||
if len(video_id) == 5:
|
||||
if appname == 'bttv':
|
||||
info = self._extract_video_info('btno', video_id)
|
||||
elif appname == 'aptv':
|
||||
info = self._extract_video_info('ap', video_id)
|
||||
|
||||
streams = data['streamUrls']
|
||||
stream_type = data.get('streamType')
|
||||
|
@@ -1,31 +1,37 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ViceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)+(?P<id>.+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'ext': 'mp4',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
'duration': 725.983,
|
||||
},
|
||||
'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'ext': 'mp4',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
'duration': 725.983,
|
||||
},
|
||||
'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -38,3 +44,35 @@ class ViceIE(InfoExtractor):
|
||||
except ExtractorError:
|
||||
raise ExtractorError('The page doesn\'t contain a video', expected=True)
|
||||
return self.url_result(ooyala_url, ie='Ooyala')
|
||||
|
||||
|
||||
class ViceShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2',
|
||||
'info_dict': {
|
||||
'id': 'fuck-thats-delicious-2',
|
||||
'title': "Fuck, That's Delicious",
|
||||
'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(video_url, ViceIE.ie_key())
|
||||
for video_url, _ in re.findall(
|
||||
r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
|
||||
% ViceIE._VALID_URL, webpage)]
|
||||
|
||||
title = self._search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'title', default=None)
|
||||
if title:
|
||||
title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
return self.playlist_result(entries, show_id, title, description)
|
||||
|
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import sanitized_Request
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class VideoMegaIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?:videomega:|https?://(?:www\.)?videomega\.tv/(?:(?:view|iframe|cdn)\.php)?\?ref=)(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA',
|
||||
@@ -42,8 +44,10 @@ class VideoMegaIE(InfoExtractor):
|
||||
r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s*|\s*-\svideomega\.tv$)', '', title)
|
||||
thumbnail = self._search_regex(
|
||||
r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
real_codes = decode_packed_codes(webpage)
|
||||
video_url = self._search_regex(
|
||||
r'<source[^>]+?src="([^"]+)"', webpage, 'video URL')
|
||||
r'"src"\s*,\s*"([^"]+)"', real_codes, 'video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,11 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class VidziIE(InfoExtractor):
|
||||
class VidziIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
|
||||
_TEST = {
|
||||
'url': 'http://vidzi.tv/cghql9yq6emu.html',
|
||||
@@ -14,7 +17,6 @@ class VidziIE(InfoExtractor):
|
||||
'id': 'cghql9yq6emu',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
|
||||
'uploader': 'vidzi.tv',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -29,11 +31,12 @@ class VidziIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
||||
|
||||
# Vidzi now uses jwplayer, which can be handled by GenericIE
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(url, {'to_generic': True}),
|
||||
'ie_key': 'Generic',
|
||||
}
|
||||
code = decode_packed_codes(webpage).replace('\\\'', '\'')
|
||||
jwplayer_data = self._parse_json(
|
||||
self._search_regex(r'setup\(([^)]+)\)', code, 'jwplayer data'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
|
||||
info_dict['title'] = title
|
||||
|
||||
return info_dict
|
||||
|
@@ -176,13 +176,13 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# youtube external
|
||||
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
|
||||
'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
|
||||
'md5': '63f8600c1da6f01b7640eee7eca4f1da',
|
||||
'info_dict': {
|
||||
'id': '50562v',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Poor Nastya [COMPLETE] - Episode 1',
|
||||
'description': '',
|
||||
'duration': 607,
|
||||
'duration': 606,
|
||||
'timestamp': 1274949505,
|
||||
'upload_date': '20101213',
|
||||
'uploader': 'ad14065n',
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user