Compare commits
176 Commits
2017.08.23
...
2017.10.12
Author | SHA1 | Date | |
---|---|---|---|
![]() |
76581082f6 | ||
![]() |
2f0eb0a68a | ||
![]() |
7fee3377dc | ||
![]() |
ff3f1a62f0 | ||
![]() |
694b61545c | ||
![]() |
af0f74288d | ||
![]() |
9e38dbb19c | ||
![]() |
782195a9d4 | ||
![]() |
26bae2d965 | ||
![]() |
5fe75f976f | ||
![]() |
4fe4bda287 | ||
![]() |
cdab1df912 | ||
![]() |
dfc80bdd2e | ||
![]() |
04af3aca04 | ||
![]() |
d0f2d64114 | ||
![]() |
01c742ecd0 | ||
![]() |
9e71f88105 | ||
![]() |
ae5af89079 | ||
![]() |
197224b7a4 | ||
![]() |
8992331621 | ||
![]() |
b0dde6686c | ||
![]() |
a22ccac1f0 | ||
![]() |
8b561bfc9d | ||
![]() |
8e751a185c | ||
![]() |
3fc8f5b7c2 | ||
![]() |
665f42d8c1 | ||
![]() |
e952847541 | ||
![]() |
b1a7bf44b9 | ||
![]() |
2e2a8e97d5 | ||
![]() |
ac93c09ab2 | ||
![]() |
cd6fc19ed7 | ||
![]() |
86a15ed64b | ||
![]() |
7e85e8729f | ||
![]() |
6be08ce602 | ||
![]() |
cf5f6ed5be | ||
![]() |
6b46285e85 | ||
![]() |
6e736d86e7 | ||
![]() |
c110944fa2 | ||
![]() |
9524dca3ac | ||
![]() |
3e4cedf9e8 | ||
![]() |
bfd484ccff | ||
![]() |
b7e14f06a4 | ||
![]() |
d2ae7e24e5 | ||
![]() |
544ffb7790 | ||
![]() |
117589dfa2 | ||
![]() |
839728f5bf | ||
![]() |
fcdd37d053 | ||
![]() |
1dd126180e | ||
![]() |
4e599194d6 | ||
![]() |
c5b7014a9c | ||
![]() |
c8da40d834 | ||
![]() |
b69ca0ccfc | ||
![]() |
2c53bd51c6 | ||
![]() |
3836b02ce8 | ||
![]() |
fa3fdeb41f | ||
![]() |
eb9a15be60 | ||
![]() |
3600fd591d | ||
![]() |
63d990d285 | ||
![]() |
b14b2283a0 | ||
![]() |
02d01e15f1 | ||
![]() |
db96252831 | ||
![]() |
8b389f7e3c | ||
![]() |
9fc41bcb6b | ||
![]() |
10cab6613f | ||
![]() |
4d182955a2 | ||
![]() |
011da618bd | ||
![]() |
4c54b89e03 | ||
![]() |
a87d7b4953 | ||
![]() |
2f3933aa1e | ||
![]() |
aab20aabfc | ||
![]() |
16f54d0751 | ||
![]() |
07d1344c85 | ||
![]() |
47b5dfb047 | ||
![]() |
e3440d824a | ||
![]() |
136507b39a | ||
![]() |
7f4921b38d | ||
![]() |
f70ddd4aeb | ||
![]() |
1c22d7a7f3 | ||
![]() |
5c1452e8f1 | ||
![]() |
4bb58fa118 | ||
![]() |
13de91c9e9 | ||
![]() |
9ce1ac4046 | ||
![]() |
095774e591 | ||
![]() |
2384f5a64e | ||
![]() |
8c2895305d | ||
![]() |
8c6919e433 | ||
![]() |
f6ff52b473 | ||
![]() |
12ea5c79fb | ||
![]() |
3b65a6fbf3 | ||
![]() |
dc76eef092 | ||
![]() |
8a1a60d173 | ||
![]() |
4d8c4b46d5 | ||
![]() |
9c2a17f2ce | ||
![]() |
4ed2d7b7d1 | ||
![]() |
8251af63a1 | ||
![]() |
790d379e4d | ||
![]() |
3869028ffb | ||
![]() |
68d43a61b5 | ||
![]() |
a88d461dff | ||
![]() |
a4245acef8 | ||
![]() |
6be44a50ed | ||
![]() |
b763e1d68c | ||
![]() |
cbf85239bb | ||
![]() |
159d304a9f | ||
![]() |
86e55e317c | ||
![]() |
c46680fb2a | ||
![]() |
fad9fc537d | ||
![]() |
0732a90579 | ||
![]() |
319fc70676 | ||
![]() |
e7c3e33456 | ||
![]() |
757984af90 | ||
![]() |
2f483758bc | ||
![]() |
018cc61549 | ||
![]() |
2709d9fa28 | ||
![]() |
7dacceae75 | ||
![]() |
43df248f10 | ||
![]() |
f12a6e88b2 | ||
![]() |
806498cf2f | ||
![]() |
b98339b54b | ||
![]() |
bf6ec2fea9 | ||
![]() |
c3dd44e085 | ||
![]() |
c7e327c4d4 | ||
![]() |
48b813748d | ||
![]() |
debed8d759 | ||
![]() |
51aee72d16 | ||
![]() |
931edb2ada | ||
![]() |
5113b69124 | ||
![]() |
66c9fa36c1 | ||
![]() |
c5c9bf0c12 | ||
![]() |
880fa66f4f | ||
![]() |
6348671c4a | ||
![]() |
efc57145c1 | ||
![]() |
e9b865267a | ||
![]() |
bc35f07537 | ||
![]() |
0b4a8eb3ac | ||
![]() |
c1c1585b31 | ||
![]() |
0cbb841ba9 | ||
![]() |
d7c7100e3d | ||
![]() |
73602bcd0c | ||
![]() |
23b2df82c7 | ||
![]() |
503115540d | ||
![]() |
64f0e30b93 | ||
![]() |
a3431e1224 | ||
![]() |
a2022b0c40 | ||
![]() |
8681ed7fc8 | ||
![]() |
8d81f3e36d | ||
![]() |
7998520933 | ||
![]() |
5b4bfbfc3b | ||
![]() |
53647dfd0a | ||
![]() |
22f65a9efc | ||
![]() |
c75c384fb6 | ||
![]() |
1b41da488d | ||
![]() |
fea82c1780 | ||
![]() |
3902cdd0e3 | ||
![]() |
2cfa7cbdd0 | ||
![]() |
cc0412ef91 | ||
![]() |
1c9c8de29e | ||
![]() |
f031b76065 | ||
![]() |
62c06c593d | ||
![]() |
ff17be3ac9 | ||
![]() |
1ed4549942 | ||
![]() |
dd121cc1ca | ||
![]() |
a3c3a1e128 | ||
![]() |
085d9dd9be | ||
![]() |
151978f38a | ||
![]() |
c7121fa7b8 | ||
![]() |
745968bc72 | ||
![]() |
feee8d32e4 | ||
![]() |
c89267d31a | ||
![]() |
5ff1bc0cc1 | ||
![]() |
7552f96352 | ||
![]() |
98f9d87381 | ||
![]() |
fcace2d1ad | ||
![]() |
40e41780f1 | ||
![]() |
da57ebaf84 | ||
![]() |
47e0cef46e |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.23**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.12*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.12**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.08.23
|
||||
[debug] youtube-dl version 2017.10.12
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
1
.github/PULL_REQUEST_TEMPLATE.md
vendored
1
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -9,6 +9,7 @@
|
||||
### Before submitting a *pull request* make sure you have:
|
||||
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
|
||||
|
||||
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
|
||||
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -22,6 +22,7 @@ cover/
|
||||
updates_key.pem
|
||||
*.egg-info
|
||||
*.srt
|
||||
*.ttml
|
||||
*.sbv
|
||||
*.vtt
|
||||
*.flv
|
||||
|
7
AUTHORS
7
AUTHORS
@@ -224,3 +224,10 @@ Giuseppe Fabiano
|
||||
Örn Guðjónsson
|
||||
Parmjit Virk
|
||||
Genki Sky
|
||||
Ľuboš Katrinec
|
||||
Corey Nicholson
|
||||
Ashutosh Chaudhary
|
||||
John Dong
|
||||
Tatsuyuki Ishi
|
||||
Daniel Weber
|
||||
Kay Bouché
|
||||
|
@@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
python test/test_download.py
|
||||
nosetests
|
||||
|
||||
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
@@ -149,7 +151,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
}
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
|
189
ChangeLog
189
ChangeLog
@@ -1,3 +1,192 @@
|
||||
version 2017.10.12
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Improve _default_format_spec (#14461)
|
||||
|
||||
Extractors
|
||||
* [steam] Fix extraction (#14067)
|
||||
+ [funk] Add support for funk.net (#14464)
|
||||
+ [nexx] Add support for shortcuts and relax domain id extraction
|
||||
+ [voxmedia] Add support for recode.net (#14173)
|
||||
+ [once] Add support for vmap URLs
|
||||
+ [generic] Add support for channel9 embeds (#14469)
|
||||
* [tva] Fix extraction (#14328)
|
||||
+ [tubitv] Add support for new URL format (#14460)
|
||||
- [afreecatv:global] Remove extractor
|
||||
- [youtube:shared] Removed extractor (#14420)
|
||||
+ [slideslive] Add support for slideslive.com (#2680)
|
||||
+ [facebook] Support thumbnails (#14416)
|
||||
* [vvvvid] Fix episode number extraction (#14456)
|
||||
* [hrti:playlist] Relax URL regular expression
|
||||
* [wdr] Relax media link regular expression (#14447)
|
||||
* [hrti] Relax URL regular expression (#14443)
|
||||
* [fox] Delegate extraction to uplynk:preplay (#14147)
|
||||
+ [youtube] Add support for hooktube.com (#14437)
|
||||
|
||||
|
||||
version 2017.10.07
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Ignore duplicates in --playlist-items
|
||||
* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and
|
||||
reduce code duplication (#14425)
|
||||
+ [utils] Use cache in OnDemandPagedList by default
|
||||
* [postprocessor/ffmpeg] Convert to opus using libopus (#14381)
|
||||
|
||||
Extractors
|
||||
* [reddit] Sort formats (#14430)
|
||||
* [lnkgo] Relax URL regular expression (#14423)
|
||||
* [pornflip] Extend URL regular expression (#14405, #14406)
|
||||
+ [xtube] Add support for embed URLs (#14417)
|
||||
+ [xvideos] Add support for embed URLs and improve extraction (#14409)
|
||||
* [beeg] Fix extraction (#14403)
|
||||
* [tvn24] Relax URL regular expression (#14395)
|
||||
* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378,
|
||||
#14392, #14414, #14419, #14431)
|
||||
+ [ketnet] Add support for videos without direct sources (#14377)
|
||||
* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een
|
||||
+ [afreecatv] Add support for adult videos (#14376)
|
||||
|
||||
|
||||
version 2017.10.01
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Document youtube_include_dash_manifest
|
||||
|
||||
Extractors
|
||||
+ [tvp] Add support for new URL schema (#14368)
|
||||
+ [generic] Add support for single format Video.js embeds (#14371)
|
||||
* [yahoo] Bypass geo restriction for brightcove (#14210)
|
||||
* [yahoo] Use extracted brightcove account id (#14210)
|
||||
* [rtve:alacarta] Fix extraction (#14290)
|
||||
+ [yahoo] Add support for custom brigthcove embeds (#14210)
|
||||
+ [generic] Add support for Video.js embeds
|
||||
+ [gfycat] Add support for /gifs/detail URLs (#14322)
|
||||
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
|
||||
* [xhamsterembed] Fix extraction (#14308)
|
||||
|
||||
|
||||
version 2017.09.24
|
||||
|
||||
Core
|
||||
+ [options] Accept lrc as a subtitle conversion target format (#14292)
|
||||
* [utils] Fix handling raw TTML subtitles (#14191)
|
||||
|
||||
Extractors
|
||||
* [24video] Fix timestamp extraction and make non fatal (#14295)
|
||||
+ [24video] Add support for 24video.adult (#14295)
|
||||
+ [kakao] Add support for tv.kakao.com (#12298, #14007)
|
||||
+ [twitter] Add support for URLs without user id (#14270)
|
||||
+ [americastestkitchen] Add support for americastestkitchen.com (#10764,
|
||||
#13996)
|
||||
* [generic] Fix support for multiple HTML5 videos on one page (#14080)
|
||||
* [mixcloud] Fix extraction (#14088, #14132)
|
||||
+ [lynda] Add support for educourse.ga (#14286)
|
||||
* [beeg] Fix extraction (#14275)
|
||||
* [nbcsports:vplayer] Correct theplatform URL (#13873)
|
||||
* [twitter] Fix duration extraction (#14141)
|
||||
* [tvplay] Bypass geo restriction
|
||||
+ [heise] Add support for YouTube embeds (#14109)
|
||||
+ [popcorntv] Add support for popcorntv.it (#5914, #14211)
|
||||
* [viki] Update app data (#14181)
|
||||
* [morningstar] Relax URL regular expression (#14222)
|
||||
* [openload] Fix extraction (#14225, #14257)
|
||||
* [noovo] Fix extraction (#14214)
|
||||
* [dailymotion:playlist] Relax URL regular expression (#14219)
|
||||
+ [twitch] Add support for go.twitch.tv URLs (#14215)
|
||||
* [vgtv] Relax URL regular expression (#14223)
|
||||
|
||||
|
||||
version 2017.09.15
|
||||
|
||||
Core
|
||||
* [downloader/fragment] Restart inconsistent incomplete fragment downloads
|
||||
(#13731)
|
||||
* [YoutubeDL] Download raw subtitles files (#12909, #14191)
|
||||
|
||||
Extractors
|
||||
* [condenast] Fix extraction (#14196, #14207)
|
||||
+ [orf] Add support for f4m stories
|
||||
* [tv4] Relax URL regular expression (#14206)
|
||||
* [animeondemand] Bypass geo restriction
|
||||
+ [animeondemand] Add support for flash videos (#9944)
|
||||
|
||||
|
||||
version 2017.09.11
|
||||
|
||||
Extractors
|
||||
* [rutube:playlist] Fix suitable (#14166)
|
||||
|
||||
|
||||
version 2017.09.10
|
||||
|
||||
Core
|
||||
+ [utils] Introduce bool_or_none
|
||||
* [YoutubeDL] Ensure dir existence for each requested format (#14116)
|
||||
|
||||
Extractors
|
||||
* [fox] Fix extraction (#14147)
|
||||
* [rutube] Use bool_or_none
|
||||
* [rutube] Rework and generalize playlist extractors (#13565)
|
||||
+ [rutube:playlist] Add support for playlists (#13534, #13565)
|
||||
+ [radiocanada] Add fallback for title extraction (#14145)
|
||||
* [vk] Use dedicated YouTube embeds extraction routine
|
||||
* [vice] Use dedicated YouTube embeds extraction routine
|
||||
* [cracked] Use dedicated YouTube embeds extraction routine
|
||||
* [chilloutzone] Use dedicated YouTube embeds extraction routine
|
||||
* [abcnews] Use dedicated YouTube embeds extraction routine
|
||||
* [youtube] Separate methods for embeds extraction
|
||||
* [redtube] Fix formats extraction (#14122)
|
||||
* [arte] Relax unavailability check (#14112)
|
||||
+ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
|
||||
* [vidme:user] Relax URL regular expression (#14054)
|
||||
* [bpb] Fix extraction (#14043, #14086)
|
||||
* [soundcloud] Fix download URL with private tracks (#14093)
|
||||
* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
|
||||
* [viidea] Capture and output lecture error message (#14099)
|
||||
* [radiocanada] Skip unsupported platforms (#14100)
|
||||
|
||||
|
||||
version 2017.09.02
|
||||
|
||||
Extractors
|
||||
* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
|
||||
#14077, #14079, #14082, #14083, #14094, #14095, #14096)
|
||||
* [youtube] Fix upload date extraction (#14065)
|
||||
+ [charlierose] Add support for episodes (#14062)
|
||||
+ [bbccouk] Add support for w-prefixed ids (#14056)
|
||||
* [googledrive] Extend URL regular expression (#9785)
|
||||
+ [googledrive] Add support for source format (#14046)
|
||||
* [pornhd] Fix extraction (#14005)
|
||||
|
||||
|
||||
version 2017.08.27.1
|
||||
|
||||
Extractors
|
||||
|
||||
* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
|
||||
|
||||
|
||||
version 2017.08.27
|
||||
|
||||
Core
|
||||
+ [extractor/common] Extract height and format id for HTML5 videos (#14034)
|
||||
* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
|
||||
#8625, #9483)
|
||||
* Simplify code and split into separate routines to facilitate maintaining
|
||||
* Make retry mechanism work on errors during actual download not only
|
||||
during connection establishment phase
|
||||
* Retry on ECONNRESET and ETIMEDOUT during reading data from network
|
||||
* Retry on content too short
|
||||
* Show error description on retry
|
||||
|
||||
Extractors
|
||||
* [generic] Lower preference for extraction from LD-JSON
|
||||
* [rai] Fix audio formats extraction (#14024)
|
||||
* [youtube] Fix controversy videos extraction (#14027, #14029)
|
||||
* [mixcloud] Fix extraction (#14015, #14020)
|
||||
|
||||
|
||||
version 2017.08.23
|
||||
|
||||
Core
|
||||
|
4
Makefile
4
Makefile
@@ -49,11 +49,11 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||
mkdir -p zip
|
||||
for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
|
||||
mkdir -p zip/$$d ;\
|
||||
cp -a $$d/*.py zip/$$d/ ;\
|
||||
cp -pPR $$d/*.py zip/$$d/ ;\
|
||||
done
|
||||
touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
|
||||
mv zip/youtube_dl/__main__.py zip/
|
||||
cd zip ; zip --quiet ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
|
||||
cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
|
||||
rm -rf zip
|
||||
echo '#!$(PYTHON)' > youtube-dl
|
||||
cat youtube-dl.zip >> youtube-dl
|
||||
|
@@ -427,7 +427,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
syntax. Example: --exec 'adb push {}
|
||||
/sdcard/Music/ && rm {}'
|
||||
--convert-subs FORMAT Convert the subtitles to other format
|
||||
(currently supported: srt|ass|vtt)
|
||||
(currently supported: srt|ass|vtt|lrc)
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
@@ -936,6 +936,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
python test/test_download.py
|
||||
nosetests
|
||||
|
||||
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
@@ -1003,7 +1005,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
}
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
@@ -1165,7 +1167,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
|
||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||
|
||||
|
@@ -14,7 +14,7 @@ import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_testcases
|
||||
from test.helper import gettestcases
|
||||
from youtube_dl.utils import compat_urllib_parse_urlparse
|
||||
from youtube_dl.utils import compat_urllib_request
|
||||
|
||||
@@ -24,7 +24,7 @@ if len(sys.argv) > 1:
|
||||
else:
|
||||
METHOD = 'EURISTIC'
|
||||
|
||||
for test in get_testcases():
|
||||
for test in gettestcases():
|
||||
if METHOD == 'EURISTIC':
|
||||
try:
|
||||
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
||||
|
@@ -36,12 +36,13 @@
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **afreecatv:global**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **AMCNetworks**
|
||||
- **AmericasTestKitchen**
|
||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
@@ -128,7 +129,8 @@
|
||||
- **CamWithHer**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **Canvas**: canvas.be and een.be
|
||||
- **Canvas**
|
||||
- **CanvasEen**: canvas.be and een.be
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CartoonNetwork**
|
||||
@@ -293,6 +295,7 @@
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
- **Funimation**
|
||||
- **Funk**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
@@ -377,6 +380,7 @@
|
||||
- **Jove**
|
||||
- **jpopsuki.tv**
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
- **Kamcord**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
@@ -437,6 +441,7 @@
|
||||
- **MakerTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **ManyVids**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
@@ -591,6 +596,7 @@
|
||||
- **Openload**
|
||||
- **OraTV**
|
||||
- **orf:fm4**: radio FM4
|
||||
- **orf:fm4:story**: fm4.orf.at stories
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
@@ -624,6 +630,7 @@
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
- **PolskieRadioCategory**
|
||||
- **PopcornTV**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
- **PornFlip**
|
||||
@@ -701,6 +708,7 @@
|
||||
- **rutube:embed**: Rutube embedded videos
|
||||
- **rutube:movie**: Rutube movies
|
||||
- **rutube:person**: Rutube person videos
|
||||
- **rutube:playlist**: Rutube playlists
|
||||
- **RUTV**: RUTV.RU
|
||||
- **Ruutu**
|
||||
- **Ruv**
|
||||
@@ -730,6 +738,7 @@
|
||||
- **skynewsarabia:video**
|
||||
- **SkySports**
|
||||
- **Slideshare**
|
||||
- **SlidesLive**
|
||||
- **Slutload**
|
||||
- **smotri**: Smotri.com
|
||||
- **smotri:broadcast**: Smotri.com broadcasts
|
||||
@@ -961,6 +970,7 @@
|
||||
- **VoiceRepublic**
|
||||
- **Voot**
|
||||
- **VoxMedia**
|
||||
- **VoxMediaVolume**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **Vrak**
|
||||
@@ -1036,7 +1046,6 @@
|
||||
- **youtube:search**: YouTube.com searches
|
||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||
- **youtube:search_url**: YouTube.com search URLs
|
||||
- **youtube:shared**
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
|
@@ -466,12 +466,18 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL({'simulate': True})
|
||||
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
|
||||
|
||||
ydl = YDL({'is_live': True})
|
||||
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
|
||||
|
||||
ydl = YDL({'simulate': True, 'is_live': True})
|
||||
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
|
||||
|
||||
ydl = YDL({'outtmpl': '-'})
|
||||
self.assertEqual(ydl._default_format_spec({}), 'best')
|
||||
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
|
||||
|
||||
ydl = YDL({})
|
||||
self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
|
||||
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
|
||||
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
|
||||
|
||||
|
||||
class TestYoutubeDL(unittest.TestCase):
|
||||
@@ -770,6 +776,12 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
result = get_ids({'playlist_items': '10'})
|
||||
self.assertEqual(result, [])
|
||||
|
||||
result = get_ids({'playlist_items': '3-10'})
|
||||
self.assertEqual(result, [3, 4])
|
||||
|
||||
result = get_ids({'playlist_items': '2-4,3-4,3'})
|
||||
self.assertEqual(result, [2, 3, 4])
|
||||
|
||||
def test_urlopen_no_file_protocol(self):
|
||||
# see https://github.com/rg3/youtube-dl/issues/8227
|
||||
ydl = YDL()
|
||||
|
@@ -1064,7 +1064,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
<p begin="3" dur="-1">Ignored, three</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
</tt>'''.encode('utf-8')
|
||||
srt_data = '''1
|
||||
00:00:00,000 --> 00:00:01,000
|
||||
The following line contains Chinese characters and special symbols
|
||||
@@ -1089,7 +1089,7 @@ Line
|
||||
<p begin="0" end="1">The first line</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
</tt>'''.encode('utf-8')
|
||||
srt_data = '''1
|
||||
00:00:00,000 --> 00:00:01,000
|
||||
The first line
|
||||
@@ -1115,7 +1115,7 @@ The first line
|
||||
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
</tt>'''.encode('utf-8')
|
||||
srt_data = '''1
|
||||
00:00:02,080 --> 00:00:05,839
|
||||
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
|
||||
@@ -1138,6 +1138,26 @@ part 3</font></u>
|
||||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
|
||||
|
||||
dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
|
||||
<body>
|
||||
<div xml:lang="en">
|
||||
<p begin="0" end="1">Line 1</p>
|
||||
<p begin="1" end="2">第二行</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''.encode('utf-16')
|
||||
srt_data = '''1
|
||||
00:00:00,000 --> 00:00:01,000
|
||||
Line 1
|
||||
|
||||
2
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
第二行
|
||||
|
||||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
|
||||
|
||||
def test_cli_option(self):
|
||||
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||
|
@@ -65,6 +65,7 @@ from .utils import (
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
orderedSet,
|
||||
PagedList,
|
||||
parse_filesize,
|
||||
PerRequestProxyHandler,
|
||||
@@ -92,6 +93,7 @@ from .utils import (
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||
from .extractor.openload import PhantomJSwrapper
|
||||
from .downloader import get_suitable_downloader
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .postprocessor import (
|
||||
@@ -303,6 +305,12 @@ class YoutubeDL(object):
|
||||
otherwise prefer avconv.
|
||||
postprocessor_args: A list of additional command-line arguments for the
|
||||
postprocessor.
|
||||
|
||||
The following options are used by the Youtube extractor:
|
||||
youtube_include_dash_manifest: If True (default), DASH manifests and related
|
||||
data will be downloaded and processed by extractor.
|
||||
You can reduce network I/O by disabling it if you don't
|
||||
care about DASH.
|
||||
"""
|
||||
|
||||
_NUMERIC_FIELDS = set((
|
||||
@@ -901,15 +909,25 @@ class YoutubeDL(object):
|
||||
yield int(item)
|
||||
else:
|
||||
yield int(string_segment)
|
||||
playlistitems = iter_playlistitems(playlistitems_str)
|
||||
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
|
||||
|
||||
ie_entries = ie_result['entries']
|
||||
|
||||
def make_playlistitems_entries(list_ie_entries):
|
||||
num_entries = len(list_ie_entries)
|
||||
return [
|
||||
list_ie_entries[i - 1] for i in playlistitems
|
||||
if -num_entries <= i - 1 < num_entries]
|
||||
|
||||
def report_download(num_entries):
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, num_entries))
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
if playlistitems:
|
||||
entries = [
|
||||
ie_entries[i - 1] for i in playlistitems
|
||||
if -n_all_entries <= i - 1 < n_all_entries]
|
||||
entries = make_playlistitems_entries(ie_entries)
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
@@ -927,20 +945,15 @@ class YoutubeDL(object):
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
report_download(n_entries)
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
entry_list = list(ie_entries)
|
||||
entries = [entry_list[i - 1] for i in playlistitems]
|
||||
entries = make_playlistitems_entries(list(ie_entries))
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
report_download(n_entries)
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
@@ -1065,22 +1078,27 @@ class YoutubeDL(object):
|
||||
return _filter
|
||||
|
||||
def _default_format_spec(self, info_dict, download=True):
|
||||
req_format_list = []
|
||||
|
||||
def can_have_partial_formats():
|
||||
if self.params.get('simulate', False):
|
||||
return True
|
||||
if not download:
|
||||
return True
|
||||
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
|
||||
return False
|
||||
if info_dict.get('is_live'):
|
||||
return False
|
||||
def can_merge():
|
||||
merger = FFmpegMergerPP(self)
|
||||
return merger.available and merger.can_merge()
|
||||
if can_have_partial_formats():
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
req_format_list.append('best')
|
||||
|
||||
def prefer_best():
|
||||
if self.params.get('simulate', False):
|
||||
return False
|
||||
if not download:
|
||||
return False
|
||||
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
|
||||
return True
|
||||
if info_dict.get('is_live'):
|
||||
return True
|
||||
if not can_merge():
|
||||
return True
|
||||
return False
|
||||
|
||||
req_format_list = ['bestvideo+bestaudio', 'best']
|
||||
if prefer_best():
|
||||
req_format_list.reverse()
|
||||
return '/'.join(req_format_list)
|
||||
|
||||
def build_format_selector(self, format_spec):
|
||||
@@ -1710,12 +1728,17 @@ class YoutubeDL(object):
|
||||
if filename is None:
|
||||
return
|
||||
|
||||
try:
|
||||
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
|
||||
if dn and not os.path.exists(dn):
|
||||
os.makedirs(dn)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
||||
def ensure_dir_exists(path):
|
||||
try:
|
||||
dn = os.path.dirname(path)
|
||||
if dn and not os.path.exists(dn):
|
||||
os.makedirs(dn)
|
||||
return True
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
||||
return False
|
||||
|
||||
if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
|
||||
return
|
||||
|
||||
if self.params.get('writedescription', False):
|
||||
@@ -1758,29 +1781,30 @@ class YoutubeDL(object):
|
||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||
for sub_lang, sub_info in subtitles.items():
|
||||
sub_format = sub_info['ext']
|
||||
if sub_info.get('data') is not None:
|
||||
sub_data = sub_info['data']
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||
else:
|
||||
try:
|
||||
sub_data = ie._download_webpage(
|
||||
sub_info['url'], info_dict['id'], note=False)
|
||||
except ExtractorError as err:
|
||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||
(sub_lang, error_to_compat_str(err.cause)))
|
||||
continue
|
||||
try:
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
if sub_info.get('data') is not None:
|
||||
try:
|
||||
# Use newline='' to prevent conversion of newline characters
|
||||
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||
subfile.write(sub_info['data'])
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||
return
|
||||
else:
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
# Use newline='' to prevent conversion of newline characters
|
||||
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||
subfile.write(sub_data)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||
return
|
||||
try:
|
||||
sub_data = ie._request_webpage(
|
||||
sub_info['url'], info_dict['id'], note=False).read()
|
||||
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
||||
subfile.write(sub_data)
|
||||
except (ExtractorError, IOError, OSError, ValueError) as err:
|
||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||
(sub_lang, error_to_compat_str(err)))
|
||||
continue
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
||||
@@ -1853,8 +1877,11 @@ class YoutubeDL(object):
|
||||
for f in requested_formats:
|
||||
new_info = dict(info_dict)
|
||||
new_info.update(f)
|
||||
fname = self.prepare_filename(new_info)
|
||||
fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
|
||||
fname = prepend_extension(
|
||||
self.prepare_filename(new_info),
|
||||
'f%s' % f['format_id'], new_info['ext'])
|
||||
if not ensure_dir_exists(fname):
|
||||
return
|
||||
downloaded.append(fname)
|
||||
partial_success = dl(fname, new_info)
|
||||
success = success and partial_success
|
||||
@@ -2208,6 +2235,7 @@ class YoutubeDL(object):
|
||||
|
||||
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||
exe_versions['phantomjs'] = PhantomJSwrapper._version()
|
||||
exe_str = ', '.join(
|
||||
'%s %s' % (exe, v)
|
||||
for exe, v in sorted(exe_versions.items())
|
||||
|
@@ -206,7 +206,7 @@ def _real_main(argv=None):
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
|
||||
parser.error('invalid video recode format specified')
|
||||
if opts.convertsubtitles is not None:
|
||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
|
||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
|
||||
parser.error('invalid subtitle format specified')
|
||||
|
||||
if opts.date is not None:
|
||||
|
@@ -6,6 +6,7 @@ import collections
|
||||
import email
|
||||
import getpass
|
||||
import io
|
||||
import itertools
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
@@ -15,7 +16,6 @@ import socket
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import itertools
|
||||
import xml.etree.ElementTree
|
||||
|
||||
|
||||
@@ -2898,6 +2898,13 @@ else:
|
||||
compat_struct_pack = struct.pack
|
||||
compat_struct_unpack = struct.unpack
|
||||
|
||||
try:
|
||||
from future_builtins import zip as compat_zip
|
||||
except ImportError: # not 2.6+ or is 3.x
|
||||
try:
|
||||
from itertools import izip as compat_zip # < 2.5 or 3.x
|
||||
except ImportError:
|
||||
compat_zip = zip
|
||||
|
||||
__all__ = [
|
||||
'compat_HTMLParseError',
|
||||
@@ -2948,5 +2955,6 @@ __all__ = [
|
||||
'compat_urlretrieve',
|
||||
'compat_xml_parse_error',
|
||||
'compat_xpath',
|
||||
'compat_zip',
|
||||
'workaround_optparse_bug9161',
|
||||
]
|
||||
|
@@ -304,11 +304,11 @@ class FileDownloader(object):
|
||||
"""Report attempt to resume at given byte."""
|
||||
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
||||
|
||||
def report_retry(self, count, retries):
|
||||
def report_retry(self, err, count, retries):
|
||||
"""Report retry in case of HTTP error 5xx"""
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
|
||||
% (count, self.format_retries(retries)))
|
||||
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), count, self.format_retries(retries)))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
|
@@ -151,10 +151,15 @@ class FragmentFD(FileDownloader):
|
||||
if self.__do_ytdl_file(ctx):
|
||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
||||
self._read_ytdl_file(ctx)
|
||||
if ctx['fragment_index'] > 0 and resume_len == 0:
|
||||
self.report_error(
|
||||
'Inconsistent state of incomplete fragment download. '
|
||||
'Restarting from the beginning...')
|
||||
ctx['fragment_index'] = resume_len = 0
|
||||
self._write_ytdl_file(ctx)
|
||||
else:
|
||||
self._write_ytdl_file(ctx)
|
||||
if ctx['fragment_index'] > 0:
|
||||
assert resume_len > 0
|
||||
assert ctx['fragment_index'] == 0
|
||||
|
||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
|
||||
|
||||
|
@@ -22,8 +22,16 @@ from ..utils import (
|
||||
class HttpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
|
||||
class DownloadContext(dict):
|
||||
__getattr__ = dict.get
|
||||
__setattr__ = dict.__setitem__
|
||||
__delattr__ = dict.__delitem__
|
||||
|
||||
ctx = DownloadContext()
|
||||
ctx.filename = filename
|
||||
ctx.tmpfilename = self.temp_name(filename)
|
||||
ctx.stream = None
|
||||
|
||||
# Do not include the Accept-Encoding header
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
@@ -38,46 +46,51 @@ class HttpFD(FileDownloader):
|
||||
if is_test:
|
||||
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
else:
|
||||
resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.resume_len = 0
|
||||
|
||||
open_mode = 'wb'
|
||||
if resume_len != 0:
|
||||
if self.params.get('continuedl', True):
|
||||
self.report_resuming_byte(resume_len)
|
||||
request.add_header('Range', 'bytes=%d-' % resume_len)
|
||||
open_mode = 'ab'
|
||||
else:
|
||||
resume_len = 0
|
||||
if self.params.get('continuedl', True):
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
|
||||
count = 0
|
||||
retries = self.params.get('retries', 0)
|
||||
while count <= retries:
|
||||
|
||||
class SucceedDownload(Exception):
|
||||
pass
|
||||
|
||||
class RetryDownload(Exception):
|
||||
def __init__(self, source_error):
|
||||
self.source_error = source_error
|
||||
|
||||
def establish_connection():
|
||||
if ctx.resume_len != 0:
|
||||
self.report_resuming_byte(ctx.resume_len)
|
||||
request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
|
||||
ctx.open_mode = 'ab'
|
||||
# Establish connection
|
||||
try:
|
||||
data = self.ydl.urlopen(request)
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||
# that don't support resuming and serve a whole file with no Content-Range
|
||||
# set in response despite of requested Range (see
|
||||
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
||||
if resume_len > 0:
|
||||
content_range = data.headers.get('Content-Range')
|
||||
if ctx.resume_len > 0:
|
||||
content_range = ctx.data.headers.get('Content-Range')
|
||||
if content_range:
|
||||
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
||||
# Content-Range is present and matches requested Range, resume is possible
|
||||
if content_range_m and resume_len == int(content_range_m.group(1)):
|
||||
break
|
||||
if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
|
||||
return
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
# and performing entire redownload
|
||||
self.report_unable_to_resume()
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
# Unexpected HTTP error
|
||||
@@ -86,15 +99,15 @@ class HttpFD(FileDownloader):
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
data = self.ydl.urlopen(basic_request)
|
||||
content_length = data.info()['Content-Length']
|
||||
ctx.data = self.ydl.urlopen(basic_request)
|
||||
content_length = ctx.data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
if (content_length is not None and
|
||||
(resume_len - 100 < int(content_length) < resume_len + 100)):
|
||||
(ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
|
||||
# The file had already been fully downloaded.
|
||||
# Explanation to the above condition: in issue #175 it was revealed that
|
||||
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
||||
@@ -102,152 +115,184 @@ class HttpFD(FileDownloader):
|
||||
# I decided to implement a suggested change and consider the file
|
||||
# completely downloaded if the file size differs less than 100 bytes from
|
||||
# the one in the hard drive.
|
||||
self.report_file_already_downloaded(filename)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self.report_file_already_downloaded(ctx.filename)
|
||||
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'filename': ctx.filename,
|
||||
'status': 'finished',
|
||||
'downloaded_bytes': resume_len,
|
||||
'total_bytes': resume_len,
|
||||
'downloaded_bytes': ctx.resume_len,
|
||||
'total_bytes': ctx.resume_len,
|
||||
})
|
||||
return True
|
||||
raise SucceedDownload()
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
except socket.error as e:
|
||||
if e.errno != errno.ECONNRESET:
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
raise RetryDownload(err)
|
||||
except socket.error as err:
|
||||
if err.errno != errno.ECONNRESET:
|
||||
# Connection reset is no problem, just retry
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
|
||||
# Retry
|
||||
count += 1
|
||||
if count <= retries:
|
||||
self.report_retry(count, retries)
|
||||
def download():
|
||||
data_len = ctx.data.info().get('Content-length', None)
|
||||
|
||||
if count > retries:
|
||||
self.report_error('giving up after %s retries' % retries)
|
||||
return False
|
||||
# Range HTTP header may be ignored/unsupported by a webserver
|
||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||
# However, for a test we still would like to download just a piece of a file.
|
||||
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
||||
# block size when downloading a file.
|
||||
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||
data_len = self._TEST_FILE_SIZE
|
||||
|
||||
data_len = data.info().get('Content-length', None)
|
||||
|
||||
# Range HTTP header may be ignored/unsupported by a webserver
|
||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||
# However, for a test we still would like to download just a piece of a file.
|
||||
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
||||
# block size when downloading a file.
|
||||
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||
data_len = self._TEST_FILE_SIZE
|
||||
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + resume_len
|
||||
min_data_len = self.params.get('min_filesize')
|
||||
max_data_len = self.params.get('max_filesize')
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
|
||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||
now = None # needed for slow_down() in the first loop run
|
||||
before = start # start measuring
|
||||
while True:
|
||||
|
||||
# Download and write
|
||||
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
byte_counter += len(data_block)
|
||||
|
||||
# exit loop when download is finished
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
|
||||
# Open destination file just in time
|
||||
if stream is None:
|
||||
try:
|
||||
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
||||
assert stream is not None
|
||||
filename = self.undo_temp_name(tmpfilename)
|
||||
self.report_destination(filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + ctx.resume_len
|
||||
min_data_len = self.params.get('min_filesize')
|
||||
max_data_len = self.params.get('max_filesize')
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
byte_counter = 0 + ctx.resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
|
||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||
now = None # needed for slow_down() in the first loop run
|
||||
before = start # start measuring
|
||||
|
||||
def retry(e):
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
ctx.stream = None
|
||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
raise RetryDownload(e)
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Download and write
|
||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
# socket.timeout is a subclass of socket.error but may not have
|
||||
# errno set
|
||||
except socket.timeout as e:
|
||||
retry(e)
|
||||
except socket.error as e:
|
||||
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
|
||||
raise
|
||||
retry(e)
|
||||
|
||||
byte_counter += len(data_block)
|
||||
|
||||
# exit loop when download is finished
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
|
||||
# Open destination file just in time
|
||||
if ctx.stream is None:
|
||||
try:
|
||||
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
ctx.stream, ctx.tmpfilename = sanitize_open(
|
||||
ctx.tmpfilename, ctx.open_mode)
|
||||
assert ctx.stream is not None
|
||||
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
|
||||
self.report_destination(ctx.filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
|
||||
try:
|
||||
stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
try:
|
||||
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
|
||||
try:
|
||||
ctx.stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
return False
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, now, byte_counter - ctx.resume_len)
|
||||
|
||||
# end measuring of one loop run
|
||||
now = time.time()
|
||||
after = now
|
||||
|
||||
# Adjust block size
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
|
||||
before = after
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
||||
if data_len is None:
|
||||
eta = None
|
||||
else:
|
||||
eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
||||
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': ctx.tmpfilename,
|
||||
'filename': ctx.filename,
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - start,
|
||||
})
|
||||
|
||||
if is_test and byte_counter == data_len:
|
||||
break
|
||||
|
||||
if ctx.stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, now, byte_counter - resume_len)
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
err = ContentTooShortError(byte_counter, int(data_len))
|
||||
if count <= retries:
|
||||
retry(err)
|
||||
raise err
|
||||
|
||||
# end measuring of one loop run
|
||||
now = time.time()
|
||||
after = now
|
||||
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||
|
||||
# Adjust block size
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
|
||||
before = after
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
eta = None
|
||||
else:
|
||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - start,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': ctx.filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - start,
|
||||
})
|
||||
|
||||
if is_test and byte_counter == data_len:
|
||||
break
|
||||
return True
|
||||
|
||||
if stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
if tmpfilename != '-':
|
||||
stream.close()
|
||||
while count <= retries:
|
||||
try:
|
||||
establish_connection()
|
||||
download()
|
||||
return True
|
||||
except RetryDownload as e:
|
||||
count += 1
|
||||
if count <= retries:
|
||||
self.report_retry(e.source_error, count, retries)
|
||||
continue
|
||||
except SucceedDownload:
|
||||
return True
|
||||
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - start,
|
||||
})
|
||||
|
||||
return True
|
||||
self.report_error('giving up after %s retries' % retries)
|
||||
return False
|
||||
|
@@ -7,6 +7,7 @@ import time
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
@@ -108,9 +109,7 @@ class AbcNewsIE(InfoExtractor):
|
||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
||||
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
|
||||
webpage, 'YouTube URL', default=None)
|
||||
youtube_url = YoutubeIE._extract_url(webpage)
|
||||
|
||||
timestamp = None
|
||||
date_str = self._html_search_regex(
|
||||
@@ -140,7 +139,7 @@ class AbcNewsIE(InfoExtractor):
|
||||
}
|
||||
|
||||
if youtube_url:
|
||||
entries = [entry, self.url_result(youtube_url, 'Youtube')]
|
||||
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
return entry
|
||||
|
@@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
|
||||
webpage, 'video url', group='url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
|
@@ -138,6 +138,23 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult video
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
|
||||
'info_dict': {
|
||||
'id': '20171001_F1AE1711_196617479_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]서아 초심 찾기 방송 (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'BJ서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20171001',
|
||||
'duration': 3600,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
@@ -160,7 +177,15 @@ class AfreecaTVIE(InfoExtractor):
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, query={'nTitleNo': video_id})
|
||||
video_id, query={
|
||||
'nTitleNo': video_id,
|
||||
'partialView': 'SKIP_ADULT',
|
||||
})
|
||||
|
||||
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
||||
if flag and flag != 'SUCCEED':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, flag), expected=True)
|
||||
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
|
||||
if video_element is None or video_element.text is None:
|
||||
@@ -246,107 +271,3 @@ class AfreecaTVIE(InfoExtractor):
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class AfreecaTVGlobalIE(AfreecaTVIE):
|
||||
IE_NAME = 'afreecatv:global'
|
||||
_VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://afreeca.tv/36853014/v/58301',
|
||||
'info_dict': {
|
||||
'id': '58301',
|
||||
'title': 'tryhard top100',
|
||||
'uploader_id': '36853014',
|
||||
'uploader': 'makgi Hearthstone Live!',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
video_type = 'video' if video_id else 'live'
|
||||
query = {
|
||||
'pt': 'view',
|
||||
'bid': channel_id,
|
||||
}
|
||||
if video_id:
|
||||
query['vno'] = video_id
|
||||
video_data = self._download_json(
|
||||
'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type),
|
||||
video_id or channel_id, query=query)['channel']
|
||||
|
||||
if video_data.get('result') != 1:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg']))
|
||||
|
||||
title = video_data['title']
|
||||
|
||||
info = {
|
||||
'thumbnail': video_data.get('thumb'),
|
||||
'view_count': int_or_none(video_data.get('vcnt')),
|
||||
'age_limit': int_or_none(video_data.get('grade')),
|
||||
'uploader_id': channel_id,
|
||||
'uploader': video_data.get('cname'),
|
||||
}
|
||||
|
||||
if video_id:
|
||||
entries = []
|
||||
for i, f in enumerate(video_data.get('flist', [])):
|
||||
video_key = self.parse_video_key(f.get('key', ''))
|
||||
f_url = f.get('file')
|
||||
if not video_key or not f_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
|
||||
'title': title,
|
||||
'upload_date': video_key.get('upload_date'),
|
||||
'duration': int_or_none(f.get('length')),
|
||||
'url': f_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': int_or_none(video_data.get('length')),
|
||||
})
|
||||
if len(entries) > 1:
|
||||
info['_type'] = 'multi_video'
|
||||
info['entries'] = entries
|
||||
elif len(entries) == 1:
|
||||
i = entries[0].copy()
|
||||
i.update(info)
|
||||
info = i
|
||||
else:
|
||||
formats = []
|
||||
for s in video_data.get('strm', []):
|
||||
s_url = s.get('purl')
|
||||
if not s_url:
|
||||
continue
|
||||
stype = s.get('stype')
|
||||
if stype == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
|
||||
elif stype == 'RTMP':
|
||||
format_id = [stype]
|
||||
label = s.get('label')
|
||||
if label:
|
||||
format_id.append(label)
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': s_url,
|
||||
'tbr': int_or_none(s.get('bps')),
|
||||
'height': int_or_none(s.get('brt')),
|
||||
'ext': 'flv',
|
||||
'rtmp_live': True,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
'id': channel_id,
|
||||
'title': self._live_title(title),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return info
|
||||
|
53
youtube_dl/extractor/aliexpress.py
Normal file
53
youtube_dl/extractor/aliexpress.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AliExpressLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://live.aliexpress.com/live/2800002704436634',
|
||||
'md5': 'e729e25d47c5e557f2630eaf99b740a5',
|
||||
'info_dict': {
|
||||
'id': '2800002704436634',
|
||||
'ext': 'mp4',
|
||||
'title': 'CASIMA7.22',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'uploader': 'CASIMA Official Store',
|
||||
'timestamp': 1500717600,
|
||||
'upload_date': '20170722',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
|
||||
webpage, 'runParams'),
|
||||
video_id)
|
||||
|
||||
title = data['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['replyStreamUrl'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': data.get('coverUrl'),
|
||||
'uploader': try_get(
|
||||
data, lambda x: x['followBar']['name'], compat_str),
|
||||
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
|
||||
'formats': formats,
|
||||
}
|
85
youtube_dl/extractor/americastestkitchen.py
Executable file
85
youtube_dl/extractor/americastestkitchen.py
Executable file
@@ -0,0 +1,85 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
'info_dict': {
|
||||
'id': '1_5g5zua6e',
|
||||
'title': 'Summer Dinner Party',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1497285541,
|
||||
'upload_date': '20170612',
|
||||
'uploader_id': 'roger.metcalf@americastestkitchen.com',
|
||||
'release_date': '20170617',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 17,
|
||||
'episode': 'Summer Dinner Party',
|
||||
'episode_number': 24,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||
webpage, 'initial context'),
|
||||
video_id)
|
||||
|
||||
ep_data = try_get(
|
||||
video_data,
|
||||
(lambda x: x['episodeDetail']['content']['data'],
|
||||
lambda x: x['videoDetail']['content']['data']), dict)
|
||||
ep_meta = ep_data.get('full_video', {})
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
|
||||
title = ep_data.get('title') or ep_meta.get('title')
|
||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||
'description') or ep_meta.get('description'))
|
||||
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
|
||||
release_date = unified_strdate(ep_data.get('aired_at'))
|
||||
|
||||
season_number = int_or_none(ep_meta.get('season_number'))
|
||||
episode = ep_meta.get('title')
|
||||
episode_number = int_or_none(ep_meta.get('episode_number'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, external_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'release_date': release_date,
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
}
|
@@ -3,16 +3,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -21,6 +18,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
# German-speaking countries of Europe
|
||||
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
|
||||
_TESTS = [{
|
||||
# jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
@@ -46,6 +45,10 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Flash videos
|
||||
'url': 'https://www.anime-on-demand.de/anime/12',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -72,14 +75,13 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
request = sanitized_Request(
|
||||
post_url, urlencode_postdata(login_form))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
post_url = urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
post_url, None, 'Logging in as %s' % username,
|
||||
data=urlencode_postdata(login_form), headers={
|
||||
'Referer': self._LOGIN_URL,
|
||||
})
|
||||
|
||||
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
||||
error = self._search_regex(
|
||||
@@ -120,10 +122,11 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
formats = []
|
||||
|
||||
for input_ in re.findall(
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
|
||||
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
|
||||
attributes = extract_attributes(input_)
|
||||
title = attributes.get('data-dialog-header')
|
||||
playlist_urls = []
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
|
||||
playlist_url = attributes.get(playlist_key)
|
||||
if isinstance(playlist_url, compat_str) and re.match(
|
||||
r'/?[\da-zA-Z]+', playlist_url):
|
||||
@@ -147,19 +150,38 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
format_id_list.append(compat_str(num))
|
||||
format_id = '-'.join(format_id_list)
|
||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||
request = sanitized_Request(
|
||||
compat_urlparse.urljoin(url, playlist_url),
|
||||
item_id_list = []
|
||||
if format_id:
|
||||
item_id_list.append(format_id)
|
||||
item_id_list.append('videomaterial')
|
||||
playlist = self._download_json(
|
||||
urljoin(url, playlist_url), video_id,
|
||||
'Downloading %s JSON' % ' '.join(item_id_list),
|
||||
headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRF-Token': csrf_token,
|
||||
'Referer': url,
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
})
|
||||
playlist = self._download_json(
|
||||
request, video_id, 'Downloading %s playlist JSON' % format_id,
|
||||
fatal=False)
|
||||
}, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
stream_url = playlist.get('streamurl')
|
||||
if stream_url:
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||
stream_url)
|
||||
if rtmp:
|
||||
formats.append({
|
||||
'url': rtmp.group('url'),
|
||||
'app': rtmp.group('app'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'page_url': url,
|
||||
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
|
||||
'rtmp_real_time': True,
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
continue
|
||||
start_video = playlist.get('startvideo', 0)
|
||||
playlist = playlist.get('playlist')
|
||||
if not playlist or not isinstance(playlist, list):
|
||||
@@ -222,7 +244,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
f.update({
|
||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||
'title': m.group('title'),
|
||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||
'url': urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
|
@@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (version, size),
|
||||
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
|
||||
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
'language': version[:2],
|
||||
@@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
formats = []
|
||||
for format in settings['metadata']['sizes']:
|
||||
# The src is a file pointing to the real video file
|
||||
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format': format['type'],
|
||||
|
@@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms.title" content="(.*?)"/>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
|
@@ -82,7 +82,7 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
|
||||
vsr = player_info['VSR']
|
||||
|
||||
if not vsr and not player_info.get('VRU'):
|
||||
if not vsr:
|
||||
raise ExtractorError(
|
||||
'Video %s is not available' % player_info.get('VID') or video_id,
|
||||
expected=True)
|
||||
|
@@ -29,7 +29,7 @@ from ..compat import (
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'[pb][\da-z]{7}'
|
||||
_ID_REGEX = r'[pbw][\da-z]{7}'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
@@ -233,6 +233,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
@@ -383,7 +386,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
m3u8_id=format_id, fatal=False))
|
||||
if re.search(self._USP_RE, href):
|
||||
usp_formats = self._extract_m3u8_formats(
|
||||
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||
re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href),
|
||||
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for f in usp_formats:
|
||||
|
@@ -9,6 +9,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,9 +37,11 @@ class BeegIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
cpl_url = self._search_regex(
|
||||
r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
|
||||
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
|
||||
webpage, 'cpl', default=None, group='url')
|
||||
|
||||
cpl_url = urljoin(url, cpl_url)
|
||||
|
||||
beeg_version, beeg_salt = [None] * 2
|
||||
|
||||
if cpl_url:
|
||||
@@ -54,12 +57,16 @@ class BeegIE(InfoExtractor):
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
||||
default=None, group='beeg_salt')
|
||||
|
||||
beeg_version = beeg_version or '2000'
|
||||
beeg_version = beeg_version or '2185'
|
||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
||||
|
||||
video = self._download_json(
|
||||
'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
|
||||
video_id)
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
'https://%sbeeg.com/api/v6/%s/video/%s'
|
||||
% (api_path, beeg_version, video_id), video_id,
|
||||
fatal=api_path == 'api.')
|
||||
if video:
|
||||
break
|
||||
|
||||
def split(o, e):
|
||||
def cut(s, x):
|
||||
|
@@ -33,13 +33,18 @@ class BpbIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_info_dicts = re.findall(
|
||||
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
|
||||
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
||||
|
||||
formats = []
|
||||
for video_info in video_info_dicts:
|
||||
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
|
||||
quality = video_info['quality']
|
||||
video_url = video_info['src']
|
||||
video_info = self._parse_json(
|
||||
video_info, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video_info:
|
||||
continue
|
||||
video_url = video_info.get('src')
|
||||
if not video_url:
|
||||
continue
|
||||
quality = 'high' if '_high' in video_url else 'low'
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'preference': 10 if quality == 'high' else 0,
|
||||
|
@@ -3,24 +3,104 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'ext': 'flv',
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.03,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||
}, {
|
||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), video_id)
|
||||
|
||||
title = data['title']
|
||||
description = data.get('description')
|
||||
|
||||
formats = []
|
||||
for target in data['targetUrls']:
|
||||
format_url, format_type = target.get('url'), target.get('type')
|
||||
if not format_url or not format_type:
|
||||
continue
|
||||
if format_type == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_type, fatal=False))
|
||||
elif format_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_type, fatal=False))
|
||||
elif format_type == 'MPEG_DASH':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id=format_type, fatal=False))
|
||||
elif format_type == 'HSS':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url, video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_type,
|
||||
'url': format_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_urls = data.get('subtitleUrls')
|
||||
if isinstance(subtitle_urls, list):
|
||||
for subtitle in subtitle_urls:
|
||||
subtitle_url = subtitle.get('url')
|
||||
if subtitle_url and subtitle.get('type') == 'CLOSED':
|
||||
subtitles.setdefault('nl', []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration'), 1000),
|
||||
'thumbnail': data.get('posterImageUrl'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class CanvasEenIE(InfoExtractor):
|
||||
IE_DESC = 'canvas.be and een.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||
'md5': 'ea838375a547ac787d4064d8c7860a6c',
|
||||
'md5': 'ed66976748d12350b118455979cca293',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'De afspraak veilt voor de Warmste Week',
|
||||
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 49.02,
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
# with subtitles
|
||||
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
|
||||
@@ -40,7 +120,8 @@ class CanvasIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'Pagina niet gevonden',
|
||||
}, {
|
||||
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
|
||||
'info_dict': {
|
||||
@@ -54,7 +135,8 @@ class CanvasIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'Episode no longer available',
|
||||
}, {
|
||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||
'only_matching': True,
|
||||
@@ -66,55 +148,21 @@ class CanvasIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = (self._search_regex(
|
||||
title = strip_or_none(self._search_regex(
|
||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage)).strip()
|
||||
webpage, default=None))
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), display_id)
|
||||
|
||||
formats = []
|
||||
for target in data['targetUrls']:
|
||||
format_url, format_type = target.get('url'), target.get('type')
|
||||
if not format_url or not format_type:
|
||||
continue
|
||||
if format_type == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, entry_protocol='m3u8_native',
|
||||
ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
|
||||
elif format_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, display_id, f4m_id=format_type, fatal=False))
|
||||
elif format_type == 'MPEG_DASH':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, display_id, mpd_id=format_type, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_type,
|
||||
'url': format_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_urls = data.get('subtitleUrls')
|
||||
if isinstance(subtitle_urls, list):
|
||||
for subtitle in subtitle_urls:
|
||||
subtitle_url = subtitle.get('url')
|
||||
if subtitle_url and subtitle.get('type') == 'CLOSED':
|
||||
subtitles.setdefault('nl', []).append({'url': subtitle_url})
|
||||
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||
group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration'), 1000),
|
||||
'thumbnail': data.get('posterImageUrl'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@@ -81,6 +81,12 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
|
||||
webpage)
|
||||
|
||||
def _extract_list(self, video_id, rss_url=None):
|
||||
if not rss_url:
|
||||
rss_url = self._RSS_URL % video_id
|
||||
|
@@ -5,7 +5,7 @@ from ..utils import remove_end
|
||||
|
||||
|
||||
class CharlieRoseIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://charlierose.com/videos/27996',
|
||||
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
||||
@@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://charlierose.com/videos/27996',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://charlierose.com/episodes/30887?autoplay=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
||||
|
@@ -5,6 +5,7 @@ import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError
|
||||
@@ -70,11 +71,9 @@ class ChilloutzoneIE(InfoExtractor):
|
||||
|
||||
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
||||
if native_platform is None:
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||
webpage, 'fallback video URL', default=None)
|
||||
if youtube_url is not None:
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
youtube_url = YoutubeIE._extract_url(webpage)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
|
||||
|
||||
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
||||
# the own CDN
|
||||
|
@@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
|
||||
|
||||
class ComedyCentralShortnameIE(InfoExtractor):
|
||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
|
||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
|
||||
_TESTS = [{
|
||||
'url': ':tds',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':thedailyshow',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':theopposition',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor):
|
||||
shortcut_map = {
|
||||
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
|
||||
}
|
||||
return self.url_result(shortcut_map[video_id])
|
||||
|
@@ -1920,7 +1920,7 @@ class InfoExtractor(object):
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
representation_ms_info['fragments'] = [{
|
||||
@@ -2184,6 +2184,12 @@ class InfoExtractor(object):
|
||||
f = parse_content_type(source_attributes.get('type'))
|
||||
is_plain_url, formats = _media_formats(src, media_type, f)
|
||||
if is_plain_url:
|
||||
# res attribute is not standard but seen several times
|
||||
# in the wild
|
||||
f.update({
|
||||
'height': int_or_none(source_attributes.get('res')),
|
||||
'format_id': source_attributes.get('label'),
|
||||
})
|
||||
f.update(formats[0])
|
||||
media_info['formats'].append(f)
|
||||
else:
|
||||
@@ -2443,10 +2449,12 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(msg)
|
||||
return res
|
||||
|
||||
def _set_cookie(self, domain, name, value, expire_time=None):
|
||||
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||
path='/', secure=False, discard=False, rest={}, **kwargs):
|
||||
cookie = compat_cookiejar.Cookie(
|
||||
0, name, value, None, None, domain, None,
|
||||
None, '/', True, False, expire_time, '', None, None, None)
|
||||
0, name, value, port, port is not None, domain, True,
|
||||
domain.startswith('.'), path, True, secure, expire_time,
|
||||
discard, None, None, rest)
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def _get_cookies(self, url):
|
||||
|
@@ -116,16 +116,16 @@ class CondeNastIE(InfoExtractor):
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video_params(self, webpage):
|
||||
query = {}
|
||||
params = self._search_regex(
|
||||
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
||||
if params:
|
||||
query.update({
|
||||
'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
|
||||
'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
|
||||
'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
|
||||
})
|
||||
def _extract_video_params(self, webpage, display_id):
|
||||
query = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params',
|
||||
default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
if query:
|
||||
query['videoId'] = self._search_regex(
|
||||
r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)',
|
||||
webpage, 'video id', default=None)
|
||||
else:
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'(<[^>]+data-js="video-player"[^>]+>)',
|
||||
@@ -141,17 +141,27 @@ class CondeNastIE(InfoExtractor):
|
||||
video_id = params['videoId']
|
||||
|
||||
video_info = None
|
||||
if params.get('playerId'):
|
||||
info_page = self._download_json(
|
||||
'http://player.cnevids.com/player/video.js',
|
||||
video_id, 'Downloading video info', fatal=False, query=params)
|
||||
if info_page:
|
||||
video_info = info_page.get('video')
|
||||
if not video_info:
|
||||
info_page = self._download_webpage(
|
||||
'http://player.cnevids.com/player/loader.js',
|
||||
video_id, 'Downloading loader info', query=params)
|
||||
else:
|
||||
|
||||
# New API path
|
||||
query = params.copy()
|
||||
query['embedType'] = 'inline'
|
||||
info_page = self._download_json(
|
||||
'http://player.cnevids.com/embed-api.json', video_id,
|
||||
'Downloading embed info', fatal=False, query=query)
|
||||
|
||||
# Old fallbacks
|
||||
if not info_page:
|
||||
if params.get('playerId'):
|
||||
info_page = self._download_json(
|
||||
'http://player.cnevids.com/player/video.js', video_id,
|
||||
'Downloading video info', fatal=False, query=params)
|
||||
if info_page:
|
||||
video_info = info_page.get('video')
|
||||
if not video_info:
|
||||
info_page = self._download_webpage(
|
||||
'http://player.cnevids.com/player/loader.js',
|
||||
video_id, 'Downloading loader info', query=params)
|
||||
if not video_info:
|
||||
info_page = self._download_webpage(
|
||||
'https://player.cnevids.com/inline/video/%s.js' % video_id,
|
||||
video_id, 'Downloading inline info', query={
|
||||
@@ -215,7 +225,7 @@ class CondeNastIE(InfoExtractor):
|
||||
if url_type == 'series':
|
||||
return self._extract_series(url, webpage)
|
||||
else:
|
||||
params = self._extract_video_params(webpage)
|
||||
params = self._extract_video_params(webpage, display_id)
|
||||
info = self._search_json_ld(
|
||||
webpage, display_id, fatal=False)
|
||||
info.update(self._extract_video(params))
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
@@ -41,11 +42,9 @@ class CrackedIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
youtube_url = self._search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
|
||||
webpage, 'youtube url', default=None)
|
||||
youtube_url = YoutubeIE._extract_url(webpage)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, 'Youtube')
|
||||
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
|
||||
|
@@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
# vevo embed
|
||||
vevo_id = self._search_regex(
|
||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||
r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
|
||||
webpage, 'vevo embed', default=None)
|
||||
if vevo_id:
|
||||
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
|
||||
@@ -325,7 +325,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:playlist'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||
_TESTS = [{
|
||||
|
@@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor):
|
||||
'id': '176747451',
|
||||
'title': 'Best!',
|
||||
'uploader': 'Anonymous',
|
||||
'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
|
||||
'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
'skip': 'Only available in .de',
|
||||
|
@@ -31,20 +31,19 @@ from .aenetworks import (
|
||||
AENetworksIE,
|
||||
HistoryTopicIE,
|
||||
)
|
||||
from .afreecatv import (
|
||||
AfreecaTVIE,
|
||||
AfreecaTVGlobalIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .americastestkitchen import AmericasTestKitchenIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anvato import AnvatoIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
from .aliexpress import AliExpressLiveIE
|
||||
from .aparat import AparatIE
|
||||
from .appleconnect import AppleConnectIE
|
||||
from .appletrailers import (
|
||||
@@ -148,7 +147,10 @@ from .camdemy import (
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import CanvasIE
|
||||
from .canvas import (
|
||||
CanvasIE,
|
||||
CanvasEenIE,
|
||||
)
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
@@ -379,6 +381,7 @@ from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freshlive import FreshLiveIE
|
||||
from .funimation import FunimationIE
|
||||
from .funk import FunkIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
@@ -481,6 +484,7 @@ from .jove import JoveIE
|
||||
from .joj import JojIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kamcord import KamcordIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
@@ -563,6 +567,7 @@ from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .manyvids import ManyVidsIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .mediaset import MediasetIE
|
||||
@@ -766,6 +771,7 @@ from .ora import OraTVIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFFM4IE,
|
||||
ORFFM4StoryIE,
|
||||
ORFOE1IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
@@ -805,6 +811,7 @@ from .polskieradio import (
|
||||
PolskieRadioIE,
|
||||
PolskieRadioCategoryIE,
|
||||
)
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
@@ -897,6 +904,7 @@ from .rutube import (
|
||||
RutubeEmbedIE,
|
||||
RutubeMovieIE,
|
||||
RutubePersonIE,
|
||||
RutubePlaylistIE,
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .ruutu import RuutuIE
|
||||
@@ -933,6 +941,7 @@ from .skynewsarabia import (
|
||||
)
|
||||
from .skysports import SkySportsIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotri import (
|
||||
SmotriIE,
|
||||
@@ -1236,7 +1245,10 @@ from .vodpl import VODPlIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .voot import VootIE
|
||||
from .voxmedia import VoxMediaIE
|
||||
from .voxmedia import (
|
||||
VoxMediaVolumeIE,
|
||||
VoxMediaIE,
|
||||
)
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vrak import VrakIE
|
||||
@@ -1335,7 +1347,6 @@ from .youtube import (
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeSharedVideoIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
|
@@ -67,9 +67,9 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'Tennis on Facebook',
|
||||
'upload_date': '20140908',
|
||||
'timestamp': 1410199200,
|
||||
}
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
'note': 'Video without discernible title',
|
||||
'url': 'https://www.facebook.com/video.php?v=274175099429670',
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
@@ -78,6 +78,7 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
@@ -94,6 +95,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20160110',
|
||||
'timestamp': 1452431627,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
|
||||
@@ -121,7 +123,11 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '10153664894881749',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #10153664894881749',
|
||||
'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'timestamp': 1456259628,
|
||||
'upload_date': '20160223',
|
||||
'uploader': 'Barack Obama',
|
||||
},
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
@@ -130,10 +136,11 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
'title': 'Holocaust survivor becomes US citizen',
|
||||
'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...',
|
||||
'timestamp': 1477818095,
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
@@ -158,6 +165,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1477305000,
|
||||
'upload_date': '20161024',
|
||||
'uploader': 'La Guía Del Varón',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -376,6 +384,7 @@ class FacebookIE(InfoExtractor):
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
@@ -383,6 +392,7 @@ class FacebookIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
|
@@ -2,57 +2,113 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .uplynk import UplynkPreplayIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class FOXIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
|
||||
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
||||
'info_dict': {
|
||||
'id': '255180355939',
|
||||
'id': '4b765a60490325103ea69888fb2bd4e8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Official Trailer: Gotham',
|
||||
'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.',
|
||||
'duration': 129,
|
||||
'timestamp': 1400020798,
|
||||
'upload_date': '20140513',
|
||||
'uploader': 'NEWA-FNG-FOXCOM',
|
||||
'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
|
||||
'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
|
||||
'duration': 102,
|
||||
'timestamp': 1504291893,
|
||||
'upload_date': '20170901',
|
||||
'creator': 'FOX',
|
||||
'series': 'Gotham',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# episode, geo-restricted
|
||||
'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# episode, geo-restricted, tv provided required
|
||||
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), video_id)
|
||||
fox_pdk_player = settings['fox_pdk_player']
|
||||
release_url = fox_pdk_player['release_url']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'switch': 'http'
|
||||
}
|
||||
if fox_pdk_player.get('access') == 'locked':
|
||||
ap_p = settings['foxAdobePassProvider']
|
||||
rating = ap_p.get('videoRating')
|
||||
if rating == 'n/a':
|
||||
rating = None
|
||||
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
|
||||
video = self._download_json(
|
||||
'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
|
||||
video_id, headers={
|
||||
'apikey': 'abdcbed02c124d393b39e818a4312055',
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
title = video['name']
|
||||
release_url = video['videoRelease']['url']
|
||||
|
||||
description = video.get('description')
|
||||
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||
video.get('duration')) or parse_duration(video.get('duration'))
|
||||
timestamp = unified_timestamp(video.get('datePublished'))
|
||||
age_limit = parse_age_limit(video.get('contentRating'))
|
||||
|
||||
data = try_get(
|
||||
video, lambda x: x['trackingData']['properties'], dict) or {}
|
||||
|
||||
creator = data.get('brand') or data.get('network') or video.get('network')
|
||||
|
||||
series = video.get('seriesName') or data.get(
|
||||
'seriesName') or data.get('show')
|
||||
season_number = int_or_none(video.get('seasonNumber'))
|
||||
episode = video.get('name')
|
||||
episode_number = int_or_none(video.get('episodeNumber'))
|
||||
release_year = int_or_none(video.get('releaseYear'))
|
||||
|
||||
if data.get('authRequired'):
|
||||
# TODO: AP
|
||||
pass
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
})
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'age_limit': age_limit,
|
||||
'creator': creator,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'release_year': release_year,
|
||||
}
|
||||
|
||||
urlh = self._request_webpage(HEADRequest(release_url), video_id)
|
||||
video_url = compat_str(urlh.geturl())
|
||||
|
||||
if UplynkPreplayIE.suitable(video_url):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'ie_key': UplynkPreplayIE.ie_key(),
|
||||
})
|
||||
else:
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
@@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info')
|
||||
info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info')
|
||||
info = json.loads(info_json)
|
||||
|
||||
return {
|
||||
|
43
youtube_dl/extractor/funk.py
Normal file
43
youtube_dl/extractor/funk.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
|
||||
'md5': '4d40974481fa3475f8bccfd20c5361f8',
|
||||
'info_dict': {
|
||||
'id': '716599',
|
||||
'ext': 'mp4',
|
||||
'title': 'Neue Rechte Welle',
|
||||
'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
|
||||
'timestamp': 1501337639,
|
||||
'upload_date': '20170729',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
domain_id = NexxIE._extract_domain_id(webpage) or '741'
|
||||
nexx_id = extract_attributes(self._search_regex(
|
||||
r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
|
||||
webpage, 'media player'))['data-id']
|
||||
|
||||
return self.url_result(
|
||||
'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
|
||||
video_id=nexx_id)
|
@@ -105,7 +105,7 @@ class GameSpotIE(OnceIE):
|
||||
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
|
||||
if onceux_url:
|
||||
formats.extend(self._extract_once_formats(re.sub(
|
||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', '')))
|
||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url)))
|
||||
|
||||
if not formats:
|
||||
for quality in ['sd', 'hd']:
|
||||
|
@@ -22,6 +22,8 @@ from ..utils import (
|
||||
HEADRequest,
|
||||
is_html,
|
||||
js_to_json,
|
||||
KNOWN_EXTENSIONS,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
@@ -99,6 +101,7 @@ from .mediaset import MediasetIE
|
||||
from .joj import JojIE
|
||||
from .megaphone import MegaphoneIE
|
||||
from .vzaar import VzaarIE
|
||||
from .channel9 import Channel9IE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -1130,6 +1133,35 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
# Video.js embed, multiple formats
|
||||
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
|
||||
'info_dict': {
|
||||
'id': 'yygqldloqIk',
|
||||
'ext': 'mp4',
|
||||
'title': 'SolidWorks. Урок 6 Настройка чертежа',
|
||||
'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
|
||||
'upload_date': '20130314',
|
||||
'uploader': 'PROстое3D',
|
||||
'uploader_id': 'PROstoe3D',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Video.js embed, single format
|
||||
'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
|
||||
'info_dict': {
|
||||
'id': 'watch',
|
||||
'ext': 'mp4',
|
||||
'title': 'Step 1 - Good Foundation',
|
||||
'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||
@@ -1581,22 +1613,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
},
|
||||
# Nexx embed
|
||||
{
|
||||
'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
|
||||
'info_dict': {
|
||||
'id': '247746',
|
||||
'ext': 'mp4',
|
||||
'title': "Yesterday's Jam (OV)",
|
||||
'description': 'md5:09bc0984723fed34e2581624a84e05f0',
|
||||
'timestamp': 1492594816,
|
||||
'upload_date': '20170419',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Facebook <iframe> embed
|
||||
{
|
||||
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
|
||||
@@ -1879,6 +1895,15 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Building A Business Online: Principal Chairs Q & A',
|
||||
},
|
||||
},
|
||||
{
|
||||
# multiple HTML5 videos on one page
|
||||
'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
|
||||
'info_dict': {
|
||||
'id': 'keyscenarios',
|
||||
'title': 'Rescue Kit 14 Free Edition - Getting started',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@@ -2166,7 +2191,7 @@ class GenericIE(InfoExtractor):
|
||||
# And then there are the jokers who advertise that they use RTA,
|
||||
# but actually don't.
|
||||
AGE_LIMIT_MARKERS = [
|
||||
r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
|
||||
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
|
||||
]
|
||||
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
|
||||
age_limit = 18
|
||||
@@ -2243,36 +2268,11 @@ class GenericIE(InfoExtractor):
|
||||
if vid_me_embed_url is not None:
|
||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(r'''(?x)
|
||||
(?:
|
||||
<iframe[^>]+?src=|
|
||||
data-video-url=|
|
||||
<embed[^>]+?src=|
|
||||
embedSWF\(?:\s*|
|
||||
<object[^>]+data=|
|
||||
new\s+SWFObject\(
|
||||
)
|
||||
(["\'])
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||
(?:embed|v|p)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
# Look for YouTube embeds
|
||||
youtube_urls = YoutubeIE._extract_urls(webpage)
|
||||
if youtube_urls:
|
||||
return self.playlist_from_matches(
|
||||
matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
|
||||
|
||||
# Look for lazyYT YouTube embed
|
||||
matches = re.findall(
|
||||
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
||||
if matches:
|
||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
|
||||
|
||||
# Look for Wordpress "YouTube Video Importer" plugin
|
||||
matches = re.findall(r'''(?x)<div[^>]+
|
||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
||||
if matches:
|
||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
|
||||
youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
|
||||
|
||||
matches = DailymotionIE._extract_urls(webpage)
|
||||
if matches:
|
||||
@@ -2856,6 +2856,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
|
||||
|
||||
channel9_urls = Channel9IE._extract_urls(webpage)
|
||||
if channel9_urls:
|
||||
return self.playlist_from_matches(
|
||||
channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
|
||||
|
||||
def merge_dicts(dict1, dict2):
|
||||
merged = {}
|
||||
for k, v in dict1.items():
|
||||
@@ -2871,22 +2876,23 @@ class GenericIE(InfoExtractor):
|
||||
merged[k] = v
|
||||
return merged
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
if json_ld.get('url'):
|
||||
return merge_dicts(json_ld, info_dict)
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
for entry in entries:
|
||||
entry.update({
|
||||
if len(entries) == 1:
|
||||
entries[0].update({
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
})
|
||||
else:
|
||||
for num, entry in enumerate(entries, start=1):
|
||||
entry.update({
|
||||
'id': '%s-%s' % (video_id, num),
|
||||
'title': '%s (%d)' % (video_title, num),
|
||||
})
|
||||
for entry in entries:
|
||||
self._sort_formats(entry['formats'])
|
||||
return self.playlist_result(entries)
|
||||
return self.playlist_result(entries, video_id, video_title)
|
||||
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
@@ -2895,6 +2901,52 @@ class GenericIE(InfoExtractor):
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
return merge_dicts(info, info_dict)
|
||||
|
||||
# Video.js embed
|
||||
mobj = re.search(
|
||||
r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
sources = self._parse_json(
|
||||
mobj.group(1), video_id, transform_source=js_to_json,
|
||||
fatal=False) or []
|
||||
if not isinstance(sources, list):
|
||||
sources = [sources]
|
||||
formats = []
|
||||
for source in sources:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
continue
|
||||
src = compat_urlparse.urljoin(url, src)
|
||||
src_type = source.get('type')
|
||||
if isinstance(src_type, compat_str):
|
||||
src_type = src_type.lower()
|
||||
ext = determine_ext(src).lower()
|
||||
if src_type == 'video/youtube':
|
||||
return self.url_result(src, YoutubeIE.ie_key())
|
||||
if src_type == 'application/dash+xml' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False))
|
||||
elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': src,
|
||||
'ext': (mimetype2ext(src_type) or
|
||||
ext if ext in KNOWN_EXTENSIONS else 'mp4'),
|
||||
})
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
if json_ld.get('url'):
|
||||
return merge_dicts(json_ld, info_dict)
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
@@ -2982,7 +3034,7 @@ class GenericIE(InfoExtractor):
|
||||
# be supported by youtube-dl thus this is checked the very last (see
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
|
||||
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
||||
if embed_url:
|
||||
if embed_url and embed_url != url:
|
||||
return self.url_result(embed_url)
|
||||
|
||||
if not found:
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GfycatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||
'info_dict': {
|
||||
@@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor):
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -4,6 +4,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
@@ -12,27 +13,53 @@ from ..utils import (
|
||||
|
||||
|
||||
class GoogleDriveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:docs|drive)\.google\.com/
|
||||
(?:
|
||||
(?:uc|open)\?.*?id=|
|
||||
file/d/
|
||||
)|
|
||||
video\.google\.com/get_player\?.*?docid=
|
||||
)
|
||||
(?P<id>[a-zA-Z0-9_-]{28,})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
||||
'md5': '5c602afbbf2c1db91831f5d82f678554',
|
||||
'info_dict': {
|
||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny.mp4',
|
||||
'duration': 45,
|
||||
}
|
||||
}, {
|
||||
# video can't be watched anonymously due to view count limit reached,
|
||||
# but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
|
||||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
||||
'info_dict': {
|
||||
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
||||
}
|
||||
}, {
|
||||
# video id is longer than 28 characters
|
||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||
'md5': 'c230c67252874fddd8170e3fd1a45886',
|
||||
'info_dict': {
|
||||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
||||
'duration': 189,
|
||||
},
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FORMATS_EXT = {
|
||||
'5': 'flv',
|
||||
@@ -147,47 +174,84 @@ class GoogleDriveIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||
|
||||
reason = self._search_regex(
|
||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||
if reason:
|
||||
raise ExtractorError(reason)
|
||||
|
||||
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
|
||||
title = self._search_regex(
|
||||
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
||||
default=None))
|
||||
fmt_stream_map = self._search_regex(
|
||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt stream map').split(',')
|
||||
fmt_list = self._search_regex(
|
||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
||||
|
||||
resolutions = {}
|
||||
for fmt in fmt_list:
|
||||
mobj = re.search(
|
||||
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||
if mobj:
|
||||
resolutions[mobj.group('format_id')] = (
|
||||
int(mobj.group('width')), int(mobj.group('height')))
|
||||
|
||||
formats = []
|
||||
for fmt_stream in fmt_stream_map:
|
||||
fmt_stream_split = fmt_stream.split('|')
|
||||
if len(fmt_stream_split) < 2:
|
||||
continue
|
||||
format_id, format_url = fmt_stream_split[:2]
|
||||
f = {
|
||||
'url': lowercase_escape(format_url),
|
||||
'format_id': format_id,
|
||||
'ext': self._FORMATS_EXT[format_id],
|
||||
}
|
||||
resolution = resolutions.get(format_id)
|
||||
if resolution:
|
||||
f.update({
|
||||
'width': resolution[0],
|
||||
'height': resolution[1],
|
||||
fmt_stream_map = self._search_regex(
|
||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt stream map', default='').split(',')
|
||||
fmt_list = self._search_regex(
|
||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt_list', default='').split(',')
|
||||
if fmt_stream_map and fmt_list:
|
||||
resolutions = {}
|
||||
for fmt in fmt_list:
|
||||
mobj = re.search(
|
||||
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||
if mobj:
|
||||
resolutions[mobj.group('format_id')] = (
|
||||
int(mobj.group('width')), int(mobj.group('height')))
|
||||
|
||||
for fmt_stream in fmt_stream_map:
|
||||
fmt_stream_split = fmt_stream.split('|')
|
||||
if len(fmt_stream_split) < 2:
|
||||
continue
|
||||
format_id, format_url = fmt_stream_split[:2]
|
||||
f = {
|
||||
'url': lowercase_escape(format_url),
|
||||
'format_id': format_id,
|
||||
'ext': self._FORMATS_EXT[format_id],
|
||||
}
|
||||
resolution = resolutions.get(format_id)
|
||||
if resolution:
|
||||
f.update({
|
||||
'width': resolution[0],
|
||||
'height': resolution[1],
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
source_url = update_url_query(
|
||||
'https://drive.google.com/uc', {
|
||||
'id': video_id,
|
||||
'export': 'download',
|
||||
})
|
||||
urlh = self._request_webpage(
|
||||
source_url, video_id, note='Requesting source file',
|
||||
errnote='Unable to request source file', fatal=False)
|
||||
if urlh:
|
||||
def add_source_format(src_url):
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
'ext': determine_ext(title, 'mp4').lower(),
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
})
|
||||
formats.append(f)
|
||||
if urlh.headers.get('Content-Disposition'):
|
||||
add_source_format(source_url)
|
||||
else:
|
||||
confirmation_webpage = self._webpage_read_content(
|
||||
urlh, url, video_id, note='Downloading confirmation page',
|
||||
errnote='Unable to confirm download', fatal=False)
|
||||
if confirmation_webpage:
|
||||
confirm = self._search_regex(
|
||||
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||
'confirmation code', fatal=False)
|
||||
if confirm:
|
||||
add_source_format(update_url_query(source_url, {
|
||||
'confirm': confirm,
|
||||
}))
|
||||
|
||||
if not formats:
|
||||
reason = self._search_regex(
|
||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||
if reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
hl = self._search_regex(
|
||||
|
@@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor):
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
} for width, height, video_url in re.findall(
|
||||
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)]
|
||||
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
@@ -25,6 +26,22 @@ class HeiseIE(InfoExtractor):
|
||||
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
||||
'thumbnail': r're:^https?://.*/gallery/$',
|
||||
}
|
||||
}, {
|
||||
# YouTube embed
|
||||
'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
|
||||
'md5': 'e403d2b43fea8e405e88e3f8623909f1',
|
||||
'info_dict': {
|
||||
'id': '6kmWbXleKW4',
|
||||
'ext': 'mp4',
|
||||
'title': 'NEU IM SEPTEMBER | Netflix',
|
||||
'description': 'md5:2131f3c7525e540d5fd841de938bd452',
|
||||
'upload_date': '20170830',
|
||||
'uploader': 'Netflix Deutschland, Österreich und Schweiz',
|
||||
'uploader_id': 'netflixdach',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
||||
'only_matching': True,
|
||||
@@ -40,6 +57,16 @@ class HeiseIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||
if not title or title == "c't":
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||
webpage, 'title')
|
||||
|
||||
yt_urls = YoutubeIE._extract_urls(webpage)
|
||||
if yt_urls:
|
||||
return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
|
||||
|
||||
container_id = self._search_regex(
|
||||
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||
webpage, 'container ID')
|
||||
@@ -47,12 +74,6 @@ class HeiseIE(InfoExtractor):
|
||||
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||
webpage, 'sequenz ID')
|
||||
|
||||
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||
if not title or title == "c't":
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||
webpage, 'title')
|
||||
|
||||
doc = self._download_xml(
|
||||
'http://www.heise.de/videout/feed', video_id, query={
|
||||
'container': container_id,
|
||||
|
@@ -104,7 +104,7 @@ class HRTiIE(HRTiBaseIE):
|
||||
(?:
|
||||
hrti:(?P<short_id>[0-9]+)|
|
||||
https?://
|
||||
hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
|
||||
hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
@@ -129,6 +129,9 @@ class HRTiIE(HRTiBaseIE):
|
||||
}, {
|
||||
'url': 'hrti:2181385',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -170,7 +173,7 @@ class HRTiIE(HRTiBaseIE):
|
||||
|
||||
|
||||
class HRTiPlaylistIE(HRTiBaseIE):
|
||||
_VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
|
||||
_VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
|
||||
'info_dict': {
|
||||
@@ -182,6 +185,9 @@ class HRTiPlaylistIE(HRTiBaseIE):
|
||||
}, {
|
||||
'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -203,7 +203,7 @@ class PCMagIE(IGNIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
|
||||
IE_NAME = 'pcmag'
|
||||
|
||||
_EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]'
|
||||
_EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||
|
@@ -69,9 +69,9 @@ class InfoQIE(BokeCCBaseIE):
|
||||
}]
|
||||
|
||||
def _extract_cookies(self, webpage):
|
||||
policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
|
||||
signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
|
||||
key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
|
||||
policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
|
||||
signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
|
||||
key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
|
||||
return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
|
||||
policy, signature, key_pair_id)
|
||||
|
||||
|
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, title)
|
||||
title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
|
||||
config_url = self._html_search_regex(
|
||||
r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
|
||||
r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"',
|
||||
webpage, 'config URL')
|
||||
config_url = 'http://www.jeuxvideo.com' + config_url
|
||||
|
||||
|
149
youtube_dl/extractor/kakao.py
Normal file
149
youtube_dl/extractor/kakao.py
Normal file
@@ -0,0 +1,149 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class KakaoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
|
||||
_API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
|
||||
'md5': '702b2fbdeb51ad82f5c904e8c0766340',
|
||||
'info_dict': {
|
||||
'id': '301965083',
|
||||
'ext': 'mp4',
|
||||
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
|
||||
'uploader_id': 2671005,
|
||||
'uploader': '그랑그랑이',
|
||||
'timestamp': 1488160199,
|
||||
'upload_date': '20170227',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
|
||||
'md5': 'a8917742069a4dd442516b86e7d66529',
|
||||
'info_dict': {
|
||||
'id': '300103180',
|
||||
'ext': 'mp4',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||
'uploader_id': 2653210,
|
||||
'uploader': '쇼 음악중심',
|
||||
'timestamp': 1485684628,
|
||||
'upload_date': '20170129',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_header = {
|
||||
'Referer': update_url_query(
|
||||
'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
|
||||
'service': 'kakao_tv',
|
||||
'autoplay': '1',
|
||||
'profile': 'HIGH',
|
||||
'wmode': 'transparent',
|
||||
})
|
||||
}
|
||||
|
||||
QUERY_COMMON = {
|
||||
'player': 'monet_html5',
|
||||
'referer': url,
|
||||
'uuid': '',
|
||||
'service': 'kakao_tv',
|
||||
'section': '',
|
||||
'dteType': 'PC',
|
||||
}
|
||||
|
||||
query = QUERY_COMMON.copy()
|
||||
query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
|
||||
impress = self._download_json(
|
||||
'%s/%s/impress' % (self._API_BASE, video_id),
|
||||
video_id, 'Downloading video info',
|
||||
query=query, headers=player_header)
|
||||
|
||||
clip_link = impress['clipLink']
|
||||
clip = clip_link['clip']
|
||||
|
||||
title = clip.get('title') or clip_link.get('displayTitle')
|
||||
|
||||
tid = impress.get('tid', '')
|
||||
|
||||
query = QUERY_COMMON.copy()
|
||||
query.update({
|
||||
'tid': tid,
|
||||
'profile': 'HIGH',
|
||||
})
|
||||
raw = self._download_json(
|
||||
'%s/%s/raw' % (self._API_BASE, video_id),
|
||||
video_id, 'Downloading video formats info',
|
||||
query=query, headers=player_header)
|
||||
|
||||
formats = []
|
||||
for fmt in raw.get('outputList', []):
|
||||
try:
|
||||
profile_name = fmt['profile']
|
||||
fmt_url_json = self._download_json(
|
||||
'%s/%s/raw/videolocation' % (self._API_BASE, video_id),
|
||||
video_id,
|
||||
'Downloading video URL for profile %s' % profile_name,
|
||||
query={
|
||||
'service': 'kakao_tv',
|
||||
'section': '',
|
||||
'tid': tid,
|
||||
'profile': profile_name
|
||||
}, headers=player_header, fatal=False)
|
||||
|
||||
if fmt_url_json is None:
|
||||
continue
|
||||
|
||||
fmt_url = fmt_url_json['url']
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': profile_name,
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
'format_note': fmt.get('label'),
|
||||
'filesize': int_or_none(fmt.get('filesize'))
|
||||
})
|
||||
except KeyError:
|
||||
pass
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbs = []
|
||||
for thumb in clip.get('clipChapterThumbnailList', []):
|
||||
thumbs.append({
|
||||
'url': thumb.get('thumbnailUrl'),
|
||||
'id': compat_str(thumb.get('timeInSec')),
|
||||
'preference': -1 if thumb.get('isDefault') else 0
|
||||
})
|
||||
top_thumbnail = clip.get('thumbnailUrl')
|
||||
if top_thumbnail:
|
||||
thumbs.append({
|
||||
'url': top_thumbnail,
|
||||
'preference': 10,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': clip.get('description'),
|
||||
'uploader': clip_link.get('channel', {}).get('name'),
|
||||
'uploader_id': clip_link.get('channelId'),
|
||||
'thumbnails': thumbs,
|
||||
'timestamp': unified_timestamp(clip_link.get('createTime')),
|
||||
'duration': int_or_none(clip.get('duration')),
|
||||
'view_count': int_or_none(clip.get('playCount')),
|
||||
'like_count': int_or_none(clip.get('likeCount')),
|
||||
'comment_count': int_or_none(clip.get('commentCount')),
|
||||
'formats': formats,
|
||||
}
|
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .canvas import CanvasIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -7,7 +8,7 @@ class KetnetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
|
||||
'md5': 'd907f7b1814ef0fa285c0475d9994ed7',
|
||||
'md5': '6bdeb65998930251bbd1c510750edba9',
|
||||
'info_dict': {
|
||||
'id': 'zomerse-filmpjes',
|
||||
'ext': 'mp4',
|
||||
@@ -15,6 +16,20 @@ class KetnetIE(InfoExtractor):
|
||||
'description': 'Gluur mee met Ghost Rockers op de filmset',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
# mzid in playerConfig instead of sources
|
||||
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
|
||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'ext': 'flv',
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.03,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
|
||||
'only_matching': True,
|
||||
@@ -38,6 +53,12 @@ class KetnetIE(InfoExtractor):
|
||||
'player config'),
|
||||
video_id)
|
||||
|
||||
mzid = config.get('mzid')
|
||||
if mzid:
|
||||
return self.url_result(
|
||||
'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
|
||||
CanvasIE.ie_key(), video_id=mzid)
|
||||
|
||||
title = config['title']
|
||||
|
||||
formats = []
|
||||
|
@@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor):
|
||||
info = {
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
|
||||
'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
|
||||
}
|
||||
video_data = self._download_json(stream_url, content_id)
|
||||
is_live = video_data.get('isLive')
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
|
||||
'info_dict': {
|
||||
@@ -42,6 +42,9 @@ class LnkGoIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AGE_LIMITS = {
|
||||
'N-7': 7,
|
||||
|
@@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor):
|
||||
class LyndaIE(LyndaBaseIE):
|
||||
IE_NAME = 'lynda'
|
||||
IE_DESC = 'lynda.com videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
|
||||
|
||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||
|
||||
@@ -110,6 +110,9 @@ class LyndaIE(LyndaBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _raise_unavailable(self, video_id):
|
||||
@@ -253,7 +256,7 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
|
||||
# Course link equals to welcome/introduction video link of same course
|
||||
# We will recognize it as course link
|
||||
_VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
||||
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class MakerTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
|
||||
_TEST = {
|
||||
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
|
||||
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
|
||||
|
@@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor):
|
||||
|
||||
format_url = self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)',
|
||||
r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
formats = self._extract_wowza_formats(
|
||||
|
48
youtube_dl/extractor/manyvids.py
Normal file
48
youtube_dl/extractor/manyvids.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class ManyVidsIE(InfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
|
||||
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
|
||||
'info_dict': {
|
||||
'id': '133957',
|
||||
'ext': 'mp4',
|
||||
'title': 'everthing about me (Preview)',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video URL', group='url')
|
||||
|
||||
title = '%s (Preview)' % self._html_search_regex(
|
||||
r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
||||
'view count', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
}],
|
||||
}
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class MeipaiIE(InfoExtractor):
|
||||
IE_DESC = '美拍'
|
||||
_VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# regular uploaded video
|
||||
'url': 'http://www.meipai.com/media/531697625',
|
||||
|
@@ -12,12 +12,16 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
compat_zip
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
str_to_int,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -54,27 +58,12 @@ class MixcloudIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_keys = [
|
||||
'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };',
|
||||
'pleasedontdownloadourmusictheartistswontgetpaid',
|
||||
'window.addEventListener = window.addEventListener || function() {};',
|
||||
'(function() { return new Date().toLocaleDateString(); })()'
|
||||
]
|
||||
_current_key = None
|
||||
|
||||
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
|
||||
def _decrypt_play_info(self, play_info, video_id):
|
||||
play_info = base64.b64decode(play_info.encode('ascii'))
|
||||
for num, key in enumerate(self._keys, start=1):
|
||||
try:
|
||||
return self._parse_json(
|
||||
''.join([
|
||||
compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)]))
|
||||
for idx, ch in enumerate(play_info)]),
|
||||
video_id)
|
||||
except ExtractorError:
|
||||
if num == len(self._keys):
|
||||
raise
|
||||
@staticmethod
|
||||
def _decrypt_xor_cipher(key, ciphertext):
|
||||
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
|
||||
return ''.join([
|
||||
compat_chr(compat_ord(ch) ^ compat_ord(k))
|
||||
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -84,54 +73,119 @@ class MixcloudIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
|
||||
if not self._current_key:
|
||||
js_url = self._search_regex(
|
||||
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)',
|
||||
webpage, 'js url', default=None)
|
||||
if js_url:
|
||||
js = self._download_webpage(js_url, track_id, fatal=False)
|
||||
if js:
|
||||
KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1'
|
||||
for key_name in ('value', 'key_value'):
|
||||
key = self._search_regex(
|
||||
KEY_RE_TEMPLATE % key_name, js, 'key',
|
||||
default=None, group='key')
|
||||
if key and isinstance(key, compat_str):
|
||||
self._keys.insert(0, key)
|
||||
self._current_key = key
|
||||
# Legacy path
|
||||
encrypted_play_info = self._search_regex(
|
||||
r'm-play-info="([^"]+)"', webpage, 'play info', default=None)
|
||||
|
||||
if encrypted_play_info is not None:
|
||||
# Decode
|
||||
encrypted_play_info = base64.b64decode(encrypted_play_info)
|
||||
else:
|
||||
# New path
|
||||
full_info_json = self._parse_json(self._html_search_regex(
|
||||
r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
|
||||
webpage, 'play info'), 'play info')
|
||||
for item in full_info_json:
|
||||
item_data = try_get(
|
||||
item, lambda x: x['cloudcast']['data']['cloudcastLookup'],
|
||||
dict)
|
||||
if try_get(item_data, lambda x: x['streamInfo']['url']):
|
||||
info_json = item_data
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Failed to extract matching stream info')
|
||||
|
||||
message = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
||||
webpage, 'error message', default=None)
|
||||
|
||||
encrypted_play_info = self._search_regex(
|
||||
r'm-play-info="([^"]+)"', webpage, 'play info')
|
||||
js_url = self._search_regex(
|
||||
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)',
|
||||
webpage, 'js url')
|
||||
js = self._download_webpage(js_url, track_id, 'Downloading JS')
|
||||
# Known plaintext attack
|
||||
if encrypted_play_info:
|
||||
kps = ['{"stream_url":']
|
||||
kpa_target = encrypted_play_info
|
||||
else:
|
||||
kps = ['https://', 'http://']
|
||||
kpa_target = base64.b64decode(info_json['streamInfo']['url'])
|
||||
for kp in kps:
|
||||
partial_key = self._decrypt_xor_cipher(kpa_target, kp)
|
||||
for quote in ["'", '"']:
|
||||
key = self._search_regex(
|
||||
r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)),
|
||||
js, 'encryption key', default=None)
|
||||
if key is not None:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Failed to extract encryption key')
|
||||
|
||||
play_info = self._decrypt_play_info(encrypted_play_info, track_id)
|
||||
if encrypted_play_info is not None:
|
||||
play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info')
|
||||
if message and 'stream_url' not in play_info:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
song_url = play_info['stream_url']
|
||||
formats = [{
|
||||
'format_id': 'normal',
|
||||
'url': song_url
|
||||
}]
|
||||
|
||||
if message and 'stream_url' not in play_info:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
|
||||
thumbnail = self._proto_relative_url(self._html_search_regex(
|
||||
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
|
||||
uploader = self._html_search_regex(
|
||||
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
uploader_id = self._search_regex(
|
||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
r'/listeners/?">([0-9,.]+)</a>',
|
||||
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
|
||||
webpage, 'play count', default=None))
|
||||
|
||||
song_url = play_info['stream_url']
|
||||
else:
|
||||
title = info_json['name']
|
||||
thumbnail = urljoin(
|
||||
'https://thumbnailer.mixcloud.com/unsafe/600x600/',
|
||||
try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str))
|
||||
uploader = try_get(info_json, lambda x: x['owner']['displayName'])
|
||||
uploader_id = try_get(info_json, lambda x: x['owner']['username'])
|
||||
description = try_get(info_json, lambda x: x['description'])
|
||||
view_count = int_or_none(try_get(info_json, lambda x: x['plays']))
|
||||
|
||||
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
|
||||
thumbnail = self._proto_relative_url(self._html_search_regex(
|
||||
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
|
||||
uploader = self._html_search_regex(
|
||||
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
uploader_id = self._search_regex(
|
||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
r'/listeners/?">([0-9,.]+)</a>',
|
||||
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
|
||||
webpage, 'play count', default=None))
|
||||
stream_info = info_json['streamInfo']
|
||||
formats = []
|
||||
|
||||
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
||||
format_url = stream_info.get(url_key)
|
||||
if not format_url:
|
||||
continue
|
||||
decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url))
|
||||
if not decrypted:
|
||||
continue
|
||||
if url_key == 'hlsUrl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif url_key == 'dashUrl':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
decrypted, track_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': decrypted,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': title,
|
||||
'url': song_url,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
@@ -237,7 +291,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
functools.partial(
|
||||
self._tracks_page_func,
|
||||
'%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
|
||||
self._PAGE_SIZE, use_cache=True)
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, video_id, '%s (%s)' % (username, list_type), description)
|
||||
|
@@ -8,8 +8,8 @@ from .common import InfoExtractor
|
||||
|
||||
class MorningstarIE(InfoExtractor):
|
||||
IE_DESC = 'morningstar.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
|
||||
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
|
||||
'info_dict': {
|
||||
@@ -19,7 +19,10 @@ class MorningstarIE(InfoExtractor):
|
||||
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
|
||||
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
if mgid is None or ':' not in mgid:
|
||||
mgid = self._search_regex(
|
||||
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||
[r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
|
||||
webpage, 'mgid', default=None)
|
||||
|
||||
if not mgid:
|
||||
|
@@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor):
|
||||
else:
|
||||
video_playpath = ''
|
||||
|
||||
video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||
video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
|
||||
|
||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||
|
@@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
release_url = self._search_regex(
|
||||
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
|
||||
webpage, 'release url')
|
||||
theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path')
|
||||
theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
|
||||
video_id = theplatform_path.split('/')[-1]
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
|
@@ -43,7 +43,7 @@ class NaverIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||
m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||
webpage)
|
||||
if m_id is None:
|
||||
error = self._html_search_regex(
|
||||
|
@@ -122,7 +122,7 @@ class NBAIE(TurnerBaseIE):
|
||||
playlist_title = self._og_search_title(webpage, fatal=False)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, video_id),
|
||||
self._PAGE_SIZE, use_cache=True)
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, team, playlist_title)
|
||||
|
||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NBCIE(AdobePassIE):
|
||||
_VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
|
||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -72,6 +72,7 @@ class NBCIE(AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
permalink, video_id = re.match(self._VALID_URL, url).groups()
|
||||
permalink = 'http' + permalink
|
||||
video_data = self._download_json(
|
||||
'https://api.nbc.com/v3/videos', video_id, query={
|
||||
'filter[permalink]': permalink,
|
||||
@@ -109,10 +110,10 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
|
||||
'info_dict': {
|
||||
'id': '9CsDKds0kvHI',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
||||
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
||||
'timestamp': 1426270238,
|
||||
@@ -120,7 +121,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -134,7 +135,8 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
theplatform_url = self._og_search_video_url(webpage)
|
||||
theplatform_url = self._og_search_video_url(webpage).replace(
|
||||
'vplayer.nbcsports.com', 'player.theplatform.com')
|
||||
return self.url_result(theplatform_url, 'ThePlatform')
|
||||
|
||||
|
||||
|
@@ -18,7 +18,13 @@ from ..utils import (
|
||||
|
||||
|
||||
class NexxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
|
||||
nexx:(?P<domain_id_s>\d+):
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# movie
|
||||
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
|
||||
@@ -62,8 +68,18 @@ class NexxIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nexx:748:128907',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_domain_id(webpage):
|
||||
mobj = re.search(
|
||||
r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
|
||||
webpage)
|
||||
return mobj.group('id') if mobj else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
# Reference:
|
||||
@@ -72,11 +88,8 @@ class NexxIE(InfoExtractor):
|
||||
entries = []
|
||||
|
||||
# JavaScript Integration
|
||||
mobj = re.search(
|
||||
r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
|
||||
webpage)
|
||||
if mobj:
|
||||
domain_id = mobj.group('id')
|
||||
domain_id = NexxIE._extract_domain_id(webpage)
|
||||
if domain_id:
|
||||
for video_id in re.findall(
|
||||
r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
|
||||
webpage):
|
||||
@@ -112,7 +125,8 @@ class NexxIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
domain_id, video_id = mobj.group('domain_id', 'id')
|
||||
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# Reverse engineered from JS code (see getDeviceID function)
|
||||
device_id = '%d:%d:%d%d' % (
|
||||
|
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
@@ -24,8 +25,6 @@ class NoovoIE(InfoExtractor):
|
||||
'timestamp': 1491399228,
|
||||
'upload_date': '20170405',
|
||||
'uploader_id': '618566855001',
|
||||
'creator': 'vtele',
|
||||
'view_count': int,
|
||||
'series': 'RPM+',
|
||||
},
|
||||
'params': {
|
||||
@@ -37,13 +36,11 @@ class NoovoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '5395865725001',
|
||||
'title': 'Épisode 13 : Les retrouvailles',
|
||||
'description': 'md5:336d5ebc5436534e61d16e63ddfca327',
|
||||
'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1492019320,
|
||||
'upload_date': '20170412',
|
||||
'uploader_id': '618566855001',
|
||||
'creator': 'vtele',
|
||||
'view_count': int,
|
||||
'series': "L'amour est dans le pré",
|
||||
'season_number': 5,
|
||||
'episode': 'Épisode 13',
|
||||
@@ -58,40 +55,46 @@ class NoovoIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.noovo.ca/api/v1/pages/single-episode/%s' % video_id,
|
||||
video_id)['data']
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
content = try_get(data, lambda x: x['contents'][0])
|
||||
bc_url = BrightcoveNewIE._extract_url(self, webpage)
|
||||
|
||||
brightcove_id = data.get('brightcoveId') or content['brightcoveId']
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
title = try_get(
|
||||
data, lambda x: x['video']['nom'],
|
||||
compat_str) or self._html_search_meta(
|
||||
'dcterms.Title', webpage, 'title', fatal=True)
|
||||
|
||||
description = self._html_search_meta(
|
||||
('dcterms.Description', 'description'), webpage, 'description')
|
||||
|
||||
series = try_get(
|
||||
data, (
|
||||
lambda x: x['show']['title'],
|
||||
lambda x: x['season']['show']['title']),
|
||||
compat_str)
|
||||
data, lambda x: x['emission']['nom']) or self._search_regex(
|
||||
r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
|
||||
webpage, 'series', default=None)
|
||||
|
||||
episode = None
|
||||
og = data.get('og')
|
||||
if isinstance(og, dict) and og.get('type') == 'video.episode':
|
||||
episode = og.get('title')
|
||||
season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
|
||||
season = try_get(season_el, lambda x: x['nom'], compat_str)
|
||||
season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
|
||||
|
||||
video = content or data
|
||||
episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
|
||||
episode = try_get(episode_el, lambda x: x['nom'], compat_str)
|
||||
episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': BrightcoveNewIE.ie_key(),
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['CA']}),
|
||||
'id': brightcove_id,
|
||||
'title': video.get('title'),
|
||||
'creator': video.get('source'),
|
||||
'view_count': int_or_none(video.get('viewsCount')),
|
||||
'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'season_number': int_or_none(try_get(
|
||||
data, lambda x: x['season']['seasonNumber'])),
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(data.get('episodeNumber')),
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
@@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE):
|
||||
|
||||
class HetKlokhuisIE(NPODataMidEmbedIE):
|
||||
IE_NAME = 'hetklokhuis'
|
||||
_VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',
|
||||
|
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class OnceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
|
||||
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
|
||||
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
|
||||
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
|
||||
|
||||
|
@@ -1,14 +1,244 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_chr
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_kwargs,
|
||||
)
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
determine_ext,
|
||||
encodeArgument,
|
||||
ExtractorError,
|
||||
get_element_by_id,
|
||||
get_exe_version,
|
||||
is_outdated_version,
|
||||
std_headers,
|
||||
)
|
||||
|
||||
|
||||
def cookie_to_dict(cookie):
|
||||
cookie_dict = {
|
||||
'name': cookie.name,
|
||||
'value': cookie.value,
|
||||
}
|
||||
if cookie.port_specified:
|
||||
cookie_dict['port'] = cookie.port
|
||||
if cookie.domain_specified:
|
||||
cookie_dict['domain'] = cookie.domain
|
||||
if cookie.path_specified:
|
||||
cookie_dict['path'] = cookie.path
|
||||
if cookie.expires is not None:
|
||||
cookie_dict['expires'] = cookie.expires
|
||||
if cookie.secure is not None:
|
||||
cookie_dict['secure'] = cookie.secure
|
||||
if cookie.discard is not None:
|
||||
cookie_dict['discard'] = cookie.discard
|
||||
try:
|
||||
if (cookie.has_nonstandard_attr('httpOnly') or
|
||||
cookie.has_nonstandard_attr('httponly') or
|
||||
cookie.has_nonstandard_attr('HttpOnly')):
|
||||
cookie_dict['httponly'] = True
|
||||
except TypeError:
|
||||
pass
|
||||
return cookie_dict
|
||||
|
||||
|
||||
def cookie_jar_to_list(cookie_jar):
|
||||
return [cookie_to_dict(cookie) for cookie in cookie_jar]
|
||||
|
||||
|
||||
class PhantomJSwrapper(object):
|
||||
"""PhantomJS wrapper class
|
||||
|
||||
This class is experimental.
|
||||
"""
|
||||
|
||||
_TEMPLATE = r'''
|
||||
phantom.onError = function(msg, trace) {{
|
||||
var msgStack = ['PHANTOM ERROR: ' + msg];
|
||||
if(trace && trace.length) {{
|
||||
msgStack.push('TRACE:');
|
||||
trace.forEach(function(t) {{
|
||||
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
|
||||
+ (t.function ? ' (in function ' + t.function +')' : ''));
|
||||
}});
|
||||
}}
|
||||
console.error(msgStack.join('\n'));
|
||||
phantom.exit(1);
|
||||
}};
|
||||
var page = require('webpage').create();
|
||||
var fs = require('fs');
|
||||
var read = {{ mode: 'r', charset: 'utf-8' }};
|
||||
var write = {{ mode: 'w', charset: 'utf-8' }};
|
||||
JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
|
||||
phantom.addCookie(x);
|
||||
}});
|
||||
page.settings.resourceTimeout = {timeout};
|
||||
page.settings.userAgent = "{ua}";
|
||||
page.onLoadStarted = function() {{
|
||||
page.evaluate(function() {{
|
||||
delete window._phantom;
|
||||
delete window.callPhantom;
|
||||
}});
|
||||
}};
|
||||
var saveAndExit = function() {{
|
||||
fs.write("{html}", page.content, write);
|
||||
fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
|
||||
phantom.exit();
|
||||
}};
|
||||
page.onLoadFinished = function(status) {{
|
||||
if(page.url === "") {{
|
||||
page.setContent(fs.read("{html}", read), "{url}");
|
||||
}}
|
||||
else {{
|
||||
{jscode}
|
||||
}}
|
||||
}};
|
||||
page.open("");
|
||||
'''
|
||||
|
||||
_TMP_FILE_NAMES = ['script', 'html', 'cookies']
|
||||
|
||||
@staticmethod
|
||||
def _version():
|
||||
return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
|
||||
|
||||
def __init__(self, extractor, required_version=None, timeout=10000):
|
||||
self.exe = check_executable('phantomjs', ['-v'])
|
||||
if not self.exe:
|
||||
raise ExtractorError('PhantomJS executable not found in PATH, '
|
||||
'download it from http://phantomjs.org',
|
||||
expected=True)
|
||||
|
||||
self.extractor = extractor
|
||||
|
||||
if required_version:
|
||||
version = self._version()
|
||||
if is_outdated_version(version, required_version):
|
||||
self.extractor._downloader.report_warning(
|
||||
'Your copy of PhantomJS is outdated, update it to version '
|
||||
'%s or newer if you encounter any errors.' % required_version)
|
||||
|
||||
self.options = {
|
||||
'timeout': timeout,
|
||||
}
|
||||
self._TMP_FILES = {}
|
||||
for name in self._TMP_FILE_NAMES:
|
||||
tmp = tempfile.NamedTemporaryFile(delete=False)
|
||||
tmp.close()
|
||||
self._TMP_FILES[name] = tmp
|
||||
|
||||
def __del__(self):
|
||||
for name in self._TMP_FILE_NAMES:
|
||||
try:
|
||||
os.remove(self._TMP_FILES[name].name)
|
||||
except:
|
||||
pass
|
||||
|
||||
def _save_cookies(self, url):
|
||||
cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
|
||||
for cookie in cookies:
|
||||
if 'path' not in cookie:
|
||||
cookie['path'] = '/'
|
||||
if 'domain' not in cookie:
|
||||
cookie['domain'] = compat_urlparse.urlparse(url).netloc
|
||||
with open(self._TMP_FILES['cookies'].name, 'wb') as f:
|
||||
f.write(json.dumps(cookies).encode('utf-8'))
|
||||
|
||||
def _load_cookies(self):
|
||||
with open(self._TMP_FILES['cookies'].name, 'rb') as f:
|
||||
cookies = json.loads(f.read().decode('utf-8'))
|
||||
for cookie in cookies:
|
||||
if cookie['httponly'] is True:
|
||||
cookie['rest'] = {'httpOnly': None}
|
||||
if 'expiry' in cookie:
|
||||
cookie['expire_time'] = cookie['expiry']
|
||||
self.extractor._set_cookie(**compat_kwargs(cookie))
|
||||
|
||||
def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
|
||||
"""
|
||||
Downloads webpage (if needed) and executes JS
|
||||
|
||||
Params:
|
||||
url: website url
|
||||
html: optional, html code of website
|
||||
video_id: video id
|
||||
note: optional, displayed when downloading webpage
|
||||
note2: optional, displayed when executing JS
|
||||
headers: custom http headers
|
||||
jscode: code to be executed when page is loaded
|
||||
|
||||
Returns tuple with:
|
||||
* downloaded website (after JS execution)
|
||||
* anything you print with `console.log` (but not inside `page.execute`!)
|
||||
|
||||
In most cases you don't need to add any `jscode`.
|
||||
It is executed in `page.onLoadFinished`.
|
||||
`saveAndExit();` is mandatory, use it instead of `phantom.exit()`
|
||||
It is possible to wait for some element on the webpage, for example:
|
||||
var check = function() {
|
||||
var elementFound = page.evaluate(function() {
|
||||
return document.querySelector('#b.done') !== null;
|
||||
});
|
||||
if(elementFound)
|
||||
saveAndExit();
|
||||
else
|
||||
window.setTimeout(check, 500);
|
||||
}
|
||||
|
||||
page.evaluate(function(){
|
||||
document.querySelector('#a').click();
|
||||
});
|
||||
check();
|
||||
"""
|
||||
if 'saveAndExit();' not in jscode:
|
||||
raise ExtractorError('`saveAndExit();` not found in `jscode`')
|
||||
if not html:
|
||||
html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
|
||||
with open(self._TMP_FILES['html'].name, 'wb') as f:
|
||||
f.write(html.encode('utf-8'))
|
||||
|
||||
self._save_cookies(url)
|
||||
|
||||
replaces = self.options
|
||||
replaces['url'] = url
|
||||
user_agent = headers.get('User-Agent') or std_headers['User-Agent']
|
||||
replaces['ua'] = user_agent.replace('"', '\\"')
|
||||
replaces['jscode'] = jscode
|
||||
|
||||
for x in self._TMP_FILE_NAMES:
|
||||
replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
|
||||
|
||||
with open(self._TMP_FILES['script'].name, 'wb') as f:
|
||||
f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
|
||||
|
||||
if video_id is None:
|
||||
self.extractor.to_screen('%s' % (note2,))
|
||||
else:
|
||||
self.extractor.to_screen('%s: %s' % (video_id, note2))
|
||||
|
||||
p = subprocess.Popen([
|
||||
self.exe, '--ssl-protocol=any',
|
||||
self._TMP_FILES['script'].name
|
||||
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
out, err = p.communicate()
|
||||
if p.returncode != 0:
|
||||
raise ExtractorError(
|
||||
'Executing JS failed\n:' + encodeArgument(err))
|
||||
with open(self._TMP_FILES['html'].name, 'rb') as f:
|
||||
html = f.read().decode('utf-8')
|
||||
|
||||
self._load_cookies()
|
||||
|
||||
return (html, encodeArgument(out))
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
@@ -58,6 +288,8 @@ class OpenloadIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
@@ -66,47 +298,22 @@ class OpenloadIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
|
||||
url = 'https://openload.co/embed/%s/' % video_id
|
||||
headers = {
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}
|
||||
|
||||
webpage = self._download_webpage(url, video_id, headers=headers)
|
||||
|
||||
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
||||
raise ExtractorError('File not found', expected=True)
|
||||
raise ExtractorError('File not found', expected=True, video_id=video_id)
|
||||
|
||||
ol_id = self._search_regex(
|
||||
'<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
|
||||
webpage, 'openload ID')
|
||||
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||
webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers)
|
||||
|
||||
decoded = ''
|
||||
a = ol_id[0:24]
|
||||
b = []
|
||||
for i in range(0, len(a), 8):
|
||||
b.append(int(a[i:i + 8] or '0', 16))
|
||||
ol_id = ol_id[24:]
|
||||
j = 0
|
||||
k = 0
|
||||
while j < len(ol_id):
|
||||
c = 128
|
||||
d = 0
|
||||
e = 0
|
||||
f = 0
|
||||
_more = True
|
||||
while _more:
|
||||
if j + 1 >= len(ol_id):
|
||||
c = 143
|
||||
f = int(ol_id[j:j + 2] or '0', 16)
|
||||
j += 2
|
||||
d += (f & 127) << e
|
||||
e += 7
|
||||
_more = f >= c
|
||||
g = d ^ b[k % 3]
|
||||
for i in range(4):
|
||||
char_dec = (g >> 8 * i) & (c + 127)
|
||||
char = compat_chr(char_dec)
|
||||
if char != '#':
|
||||
decoded += char
|
||||
k += 1
|
||||
decoded_id = get_element_by_id('streamurl', webpage)
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true'
|
||||
video_url = video_url % decoded
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||
@@ -114,15 +321,17 @@ class OpenloadIE(InfoExtractor):
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
subtitles = entries[0]['subtitles'] if entries else None
|
||||
entry = entries[0] if entries else {}
|
||||
subtitles = entry.get('subtitles')
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||
'url': video_url,
|
||||
# Seems all videos have extensions in their titles
|
||||
'ext': determine_ext(title, 'mp4'),
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
}
|
||||
return info_dict
|
||||
|
@@ -6,14 +6,15 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
strip_jsonp,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
remove_end,
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -307,3 +308,108 @@ class ORFIPTVIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ORFFM4StoryIE(InfoExtractor):
|
||||
IE_NAME = 'orf:fm4:story'
|
||||
IE_DESC = 'fm4.orf.at stories'
|
||||
_VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://fm4.orf.at/stories/2865738/',
|
||||
'playlist': [{
|
||||
'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
|
||||
'info_dict': {
|
||||
'id': '547792',
|
||||
'ext': 'flv',
|
||||
'title': 'Manu Delago und Inner Tongue live',
|
||||
'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
|
||||
'duration': 1748.52,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170913',
|
||||
},
|
||||
}, {
|
||||
'md5': 'c6dd2179731f86f4f55a7b49899d515f',
|
||||
'info_dict': {
|
||||
'id': '547798',
|
||||
'ext': 'flv',
|
||||
'title': 'Manu Delago und Inner Tongue live (2)',
|
||||
'duration': 1504.08,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170913',
|
||||
'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
story_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, story_id)
|
||||
|
||||
entries = []
|
||||
all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
|
||||
for idx, video_id in enumerate(all_ids):
|
||||
data = self._download_json(
|
||||
'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
|
||||
video_id)[0]
|
||||
|
||||
duration = float_or_none(data['duration'], 1000)
|
||||
|
||||
video = data['sources']['q8c']
|
||||
load_balancer_url = video['loadBalancerUrl']
|
||||
abr = int_or_none(video.get('audioBitrate'))
|
||||
vbr = int_or_none(video.get('bitrate'))
|
||||
fps = int_or_none(video.get('videoFps'))
|
||||
width = int_or_none(video.get('videoWidth'))
|
||||
height = int_or_none(video.get('videoHeight'))
|
||||
thumbnail = video.get('preview')
|
||||
|
||||
rendition = self._download_json(
|
||||
load_balancer_url, video_id, transform_source=strip_jsonp)
|
||||
|
||||
f = {
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'fps': fps,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in rendition['redirect'].items():
|
||||
if format_id == 'rtmp':
|
||||
ff = f.copy()
|
||||
ff.update({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(ff)
|
||||
elif determine_ext(format_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id))
|
||||
elif determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||
else:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
|
||||
if idx >= 1:
|
||||
# Titles are duplicates, make them unique
|
||||
title += ' (' + str(idx + 1) + ')'
|
||||
description = self._og_search_description(webpage)
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'dc.date', webpage, 'upload date'))
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries)
|
||||
|
78
youtube_dl/extractor/popcorntv.py
Normal file
78
youtube_dl/extractor/popcorntv.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class PopcornTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183',
|
||||
'md5': '47d65a48d147caf692ab8562fe630b45',
|
||||
'info_dict': {
|
||||
'id': '9183',
|
||||
'display_id': 'food-wars-battaglie-culinarie-episodio-01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Food Wars, Battaglie Culinarie | Episodio 01',
|
||||
'description': 'md5:b8bea378faae4651d3b34c6e112463d0',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1497610857,
|
||||
'upload_date': '20170616',
|
||||
'duration': 1440,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id, video_id = mobj.group('display_id', 'id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
m3u8_url = extract_attributes(
|
||||
self._search_regex(
|
||||
r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)',
|
||||
webpage, 'content'
|
||||
))['href']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1[^>]+itemprop=["\']name[^>]*>([^<]+)', webpage,
|
||||
'title', default=None) or self._og_search_title(webpage)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<article[^>]+itemprop=["\']description[^>]*>(.+?)</article>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
timestamp = unified_timestamp(self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp'))
|
||||
print(self._html_search_meta(
|
||||
'duration', webpage))
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'duration', webpage), invscale=60)
|
||||
view_count = int_or_none(self._html_search_meta(
|
||||
'interactionCount', webpage, 'view count'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class PornFlipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
|
||||
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
|
||||
'md5': '98c46639849145ae1fd77af532a9278c',
|
||||
@@ -34,6 +34,12 @@ class PornFlipIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
|
||||
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
|
||||
|
||||
sources = self._parse_json(js_to_json(self._search_regex(
|
||||
r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]",
|
||||
r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
|
||||
webpage, 'sources', default='{}')), video_id)
|
||||
|
||||
if not sources:
|
||||
@@ -82,7 +82,8 @@ class PornHdIE(InfoExtractor):
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
|
||||
thumbnail = self._search_regex(
|
||||
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
||||
'thumbnail', fatal=False, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -20,20 +20,37 @@ from ..utils import (
|
||||
class RadioCanadaIE(InfoExtractor):
|
||||
IE_NAME = 'radiocanada'
|
||||
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
||||
'info_dict': {
|
||||
'id': '7184272',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le parcours du tireur capté sur vidéo',
|
||||
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
||||
'upload_date': '20141023',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
||||
'info_dict': {
|
||||
'id': '7184272',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le parcours du tireur capté sur vidéo',
|
||||
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
||||
'upload_date': '20141023',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
{
|
||||
# empty Title
|
||||
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
|
||||
'info_dict': {
|
||||
'id': '7754998',
|
||||
'ext': 'mp4',
|
||||
'title': 'letelejournal22h',
|
||||
'description': 'INTEGRALE WEB 22H-TJ',
|
||||
'upload_date': '20170720',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
@@ -59,6 +76,7 @@ class RadioCanadaIE(InfoExtractor):
|
||||
device_types.append('android')
|
||||
|
||||
formats = []
|
||||
error = None
|
||||
# TODO: extract f4m formats
|
||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||
for device_type in device_types:
|
||||
@@ -84,8 +102,8 @@ class RadioCanadaIE(InfoExtractor):
|
||||
if not v_url:
|
||||
continue
|
||||
if v_url == 'null':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
|
||||
error = xpath_text(v_data, 'message')
|
||||
continue
|
||||
ext = determine_ext(v_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@@ -129,6 +147,9 @@ class RadioCanadaIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
base_url + '/manifest.f4m', video_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
if not formats and error:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
@@ -141,7 +162,7 @@ class RadioCanadaIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': get_meta('Title'),
|
||||
'title': get_meta('Title') or get_meta('AV-nomEmission'),
|
||||
'description': get_meta('Description') or get_meta('ShortDescription'),
|
||||
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
|
||||
'duration': int_or_none(get_meta('length')),
|
||||
|
@@ -345,11 +345,11 @@ class RaiIE(RaiBaseIE):
|
||||
media_type = media['type']
|
||||
if 'Audio' in media_type:
|
||||
relinker_info = {
|
||||
'formats': {
|
||||
'formats': [{
|
||||
'format_id': media.get('formatoAudio'),
|
||||
'url': media['audioUrl'],
|
||||
'ext': media.get('formatoAudio'),
|
||||
}
|
||||
}]
|
||||
}
|
||||
elif 'Video' in media_type:
|
||||
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
||||
|
@@ -35,6 +35,8 @@ class RedditIE(InfoExtractor):
|
||||
'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
|
||||
mpd_id='dash', fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -62,7 +63,23 @@ class RedTubeIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id),
|
||||
})
|
||||
else:
|
||||
medias = self._parse_json(
|
||||
self._search_regex(
|
||||
r'mediaDefinition\s*:\s*(\[.+?\])', webpage,
|
||||
'media definitions', default='{}'),
|
||||
video_id, fatal=False)
|
||||
if medias and isinstance(medias, list):
|
||||
for media in medias:
|
||||
format_url = media.get('videoUrl')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
continue
|
||||
format_id = media.get('quality')
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id),
|
||||
})
|
||||
if not formats:
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
|
||||
formats.append({'url': video_url})
|
||||
@@ -73,7 +90,7 @@ class RedTubeIE(InfoExtractor):
|
||||
r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
|
||||
webpage, 'upload date', fatal=False))
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
@@ -10,6 +10,7 @@ from ..compat import (
|
||||
compat_struct_unpack,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
remove_end,
|
||||
@@ -84,6 +85,18 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
'title': 'TODO',
|
||||
},
|
||||
'skip': 'The f4m manifest can\'t be used yet',
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||
'md5': 'e55e162379ad587e9640eda4f7353c0f',
|
||||
'info_dict': {
|
||||
'id': '4236788',
|
||||
'ext': 'mp4',
|
||||
'title': 'Servir y proteger - Capítulo 104 ',
|
||||
'duration': 3222.0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||
'only_matching': True,
|
||||
@@ -107,24 +120,41 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
video_id)['page']['items'][0]
|
||||
if info['state'] == 'DESPU':
|
||||
raise ExtractorError('The video is no longer available', expected=True)
|
||||
title = info['title']
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
||||
png_request = sanitized_Request(png_url)
|
||||
png_request.add_header('Referer', url)
|
||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
if not video_url.endswith('.f4m'):
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
formats = []
|
||||
if not video_url.endswith('.f4m') and ext != 'm3u8':
|
||||
if '?' not in video_url:
|
||||
video_url = video_url.replace('resources/', 'auth/resources/')
|
||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = None
|
||||
if info.get('sbtFile') is not None:
|
||||
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('image'),
|
||||
'page_url': url,
|
||||
'subtitles': subtitles,
|
||||
|
@@ -25,7 +25,7 @@ class RUHDIE(InfoExtractor):
|
||||
video_url = self._html_search_regex(
|
||||
r'<param name="src" value="([^"]+)"', webpage, 'video url')
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+) RUHD.ru - Видео Высокого качества №1 в России!</title>',
|
||||
r'<title>([^<]+) RUHD\.ru - Видео Высокого качества №1 в России!</title>',
|
||||
webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div id="longdesc">(.+?)<span id="showlink">',
|
||||
|
@@ -7,43 +7,84 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
bool_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class RutubeIE(InfoExtractor):
|
||||
class RutubeBaseIE(InfoExtractor):
|
||||
def _extract_video(self, video, video_id=None, require_title=True):
|
||||
title = video['title'] if require_title else video.get('title')
|
||||
|
||||
age_limit = video.get('is_adult')
|
||||
if age_limit is not None:
|
||||
age_limit = 18 if age_limit is True else 0
|
||||
|
||||
uploader_id = try_get(video, lambda x: x['author']['id'])
|
||||
category = try_get(video, lambda x: x['category']['name'])
|
||||
|
||||
return {
|
||||
'id': video.get('id') or video_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': video.get('thumbnail_url'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'uploader': try_get(video, lambda x: x['author']['name']),
|
||||
'uploader_id': compat_str(uploader_id) if uploader_id else None,
|
||||
'timestamp': unified_timestamp(video.get('created_ts')),
|
||||
'category': [category] if category else None,
|
||||
'age_limit': age_limit,
|
||||
'view_count': int_or_none(video.get('hits')),
|
||||
'comment_count': int_or_none(video.get('comments_count')),
|
||||
'is_live': bool_or_none(video.get('is_livestream')),
|
||||
}
|
||||
|
||||
|
||||
class RutubeIE(RutubeBaseIE):
|
||||
IE_NAME = 'rutube'
|
||||
IE_DESC = 'Rutube videos'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||
'md5': '79938ade01294ef7e27574890d0d3769',
|
||||
'info_dict': {
|
||||
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Раненный кенгуру забежал в аптеку',
|
||||
'description': 'http://www.ntdtv.ru ',
|
||||
'duration': 80,
|
||||
'uploader': 'NTDRussian',
|
||||
'uploader_id': '29790',
|
||||
'timestamp': 1381943602,
|
||||
'upload_date': '20131016',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
# It requires ffmpeg (m3u8 download)
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [mobj.group('url') for mobj in re.finditer(
|
||||
@@ -52,12 +93,12 @@ class RutubeIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
# Some videos don't have the author field
|
||||
author = video.get('author') or {}
|
||||
info = self._extract_video(video, video_id)
|
||||
|
||||
options = self._download_json(
|
||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||
@@ -79,19 +120,8 @@ class RutubeIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video['id'],
|
||||
'title': video['title'],
|
||||
'description': video['description'],
|
||||
'duration': video['duration'],
|
||||
'view_count': video['hits'],
|
||||
'formats': formats,
|
||||
'thumbnail': video['thumbnail_url'],
|
||||
'uploader': author.get('name'),
|
||||
'uploader_id': compat_str(author['id']) if author else None,
|
||||
'upload_date': unified_strdate(video['created_ts']),
|
||||
'age_limit': 18 if video['is_adult'] else 0,
|
||||
}
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class RutubeEmbedIE(InfoExtractor):
|
||||
@@ -103,7 +133,8 @@ class RutubeEmbedIE(InfoExtractor):
|
||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||
'info_dict': {
|
||||
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'timestamp': 1387830582,
|
||||
'upload_date': '20131223',
|
||||
'uploader_id': '297833',
|
||||
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
|
||||
@@ -111,7 +142,7 @@ class RutubeEmbedIE(InfoExtractor):
|
||||
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Requires ffmpeg',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://rutube.ru/play/embed/8083783',
|
||||
@@ -125,10 +156,51 @@ class RutubeEmbedIE(InfoExtractor):
|
||||
canonical_url = self._html_search_regex(
|
||||
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
||||
'Canonical URL')
|
||||
return self.url_result(canonical_url, 'Rutube')
|
||||
return self.url_result(canonical_url, RutubeIE.ie_key())
|
||||
|
||||
|
||||
class RutubeChannelIE(InfoExtractor):
|
||||
class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||
def _next_page_url(self, page_num, playlist_id, *args, **kwargs):
|
||||
return self._PAGE_TEMPLATE % (playlist_id, page_num)
|
||||
|
||||
def _entries(self, playlist_id, *args, **kwargs):
|
||||
next_page_url = None
|
||||
for pagenum in itertools.count(1):
|
||||
page = self._download_json(
|
||||
next_page_url or self._next_page_url(
|
||||
pagenum, playlist_id, *args, **kwargs),
|
||||
playlist_id, 'Downloading page %s' % pagenum)
|
||||
|
||||
results = page.get('results')
|
||||
if not results or not isinstance(results, list):
|
||||
break
|
||||
|
||||
for result in results:
|
||||
video_url = result.get('video_url')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
entry = self._extract_video(result, require_title=False)
|
||||
entry.update({
|
||||
'_type': 'url',
|
||||
'url': video_url,
|
||||
'ie_key': RutubeIE.ie_key(),
|
||||
})
|
||||
yield entry
|
||||
|
||||
next_page_url = page.get('next')
|
||||
if not next_page_url or not page.get('has_next'):
|
||||
break
|
||||
|
||||
def _extract_playlist(self, playlist_id, *args, **kwargs):
|
||||
return self.playlist_result(
|
||||
self._entries(playlist_id, *args, **kwargs),
|
||||
playlist_id, kwargs.get('playlist_name'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_playlist(self._match_id(url))
|
||||
|
||||
|
||||
class RutubeChannelIE(RutubePlaylistBaseIE):
|
||||
IE_NAME = 'rutube:channel'
|
||||
IE_DESC = 'Rutube channels'
|
||||
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
|
||||
@@ -142,27 +214,8 @@ class RutubeChannelIE(InfoExtractor):
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
|
||||
|
||||
def _extract_videos(self, channel_id, channel_title=None):
|
||||
entries = []
|
||||
for pagenum in itertools.count(1):
|
||||
page = self._download_json(
|
||||
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
||||
channel_id, 'Downloading page %s' % pagenum)
|
||||
results = page['results']
|
||||
if not results:
|
||||
break
|
||||
entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
|
||||
if not page['has_next']:
|
||||
break
|
||||
return self.playlist_result(entries, channel_id, channel_title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
return self._extract_videos(channel_id)
|
||||
|
||||
|
||||
class RutubeMovieIE(RutubeChannelIE):
|
||||
class RutubeMovieIE(RutubePlaylistBaseIE):
|
||||
IE_NAME = 'rutube:movie'
|
||||
IE_DESC = 'Rutube movies'
|
||||
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
|
||||
@@ -176,11 +229,11 @@ class RutubeMovieIE(RutubeChannelIE):
|
||||
movie = self._download_json(
|
||||
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
||||
'Downloading movie JSON')
|
||||
movie_name = movie['name']
|
||||
return self._extract_videos(movie_id, movie_name)
|
||||
return self._extract_playlist(
|
||||
movie_id, playlist_name=movie.get('name'))
|
||||
|
||||
|
||||
class RutubePersonIE(RutubeChannelIE):
|
||||
class RutubePersonIE(RutubePlaylistBaseIE):
|
||||
IE_NAME = 'rutube:person'
|
||||
IE_DESC = 'Rutube person videos'
|
||||
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
|
||||
@@ -193,3 +246,37 @@ class RutubePersonIE(RutubeChannelIE):
|
||||
}]
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
||||
|
||||
|
||||
class RutubePlaylistIE(RutubePlaylistBaseIE):
|
||||
IE_NAME = 'rutube:playlist'
|
||||
IE_DESC = 'Rutube playlists'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag',
|
||||
'info_dict': {
|
||||
'id': '3097',
|
||||
},
|
||||
'playlist_count': 27,
|
||||
}, {
|
||||
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
if not super(RutubePlaylistIE, cls).suitable(url):
|
||||
return False
|
||||
params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
|
||||
|
||||
def _next_page_url(self, page_num, playlist_id, item_kind):
|
||||
return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
playlist_kind = qs['pl_type'][0]
|
||||
playlist_id = qs['pl_id'][0]
|
||||
return self._extract_playlist(playlist_id, item_kind=playlist_kind)
|
||||
|
34
youtube_dl/extractor/slideslive.py
Normal file
34
youtube_dl/extractor/slideslive.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class SlidesLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
|
||||
'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
|
||||
'info_dict': {
|
||||
'id': 'LMtgR8ba0b0',
|
||||
'ext': 'mp4',
|
||||
'title': '38902413: external video',
|
||||
'description': '3890241320170925-9-1yd6ech.mp4',
|
||||
'uploader': 'SlidesLive Administrator',
|
||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||
'upload_date': '20170925',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
url, video_id, headers={'Accept': 'application/json'})
|
||||
service_name = video_data['video_service_name']
|
||||
if service_name == 'YOUTUBE':
|
||||
yt_video_id = video_data['video_service_id']
|
||||
return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Unsupported service name: {0}'.format(service_name), expected=True)
|
@@ -1,8 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import (
|
||||
InfoExtractor,
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -120,6 +121,21 @@ class SoundcloudIE(InfoExtractor):
|
||||
'license': 'cc-by-sa',
|
||||
},
|
||||
},
|
||||
# private link, downloadable format
|
||||
{
|
||||
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
|
||||
'md5': '64a60b16e617d41d0bef032b7f55441e',
|
||||
'info_dict': {
|
||||
'id': '340344461',
|
||||
'ext': 'wav',
|
||||
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
||||
'uploader': 'Ori Uplift Music',
|
||||
'upload_date': '20170831',
|
||||
'duration': 7449,
|
||||
'license': 'all-rights-reserved',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
|
||||
@@ -160,11 +176,13 @@ class SoundcloudIE(InfoExtractor):
|
||||
'license': info.get('license'),
|
||||
}
|
||||
formats = []
|
||||
query = {'client_id': self._CLIENT_ID}
|
||||
if secret_token is not None:
|
||||
query['secret_token'] = secret_token
|
||||
if info.get('downloadable', False):
|
||||
# We can build a direct link to the song
|
||||
format_url = (
|
||||
'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
|
||||
track_id, self._CLIENT_ID))
|
||||
format_url = update_url_query(
|
||||
'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
|
||||
formats.append({
|
||||
'format_id': 'download',
|
||||
'ext': info.get('original_format', 'mp3'),
|
||||
@@ -176,10 +194,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
# We have to retrieve the url
|
||||
format_dict = self._download_json(
|
||||
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
|
||||
track_id, 'Downloading track url', query={
|
||||
'client_id': self._CLIENT_ID,
|
||||
'secret_token': secret_token,
|
||||
})
|
||||
track_id, 'Downloading track url', query=query)
|
||||
|
||||
for key, stream_url in format_dict.items():
|
||||
abr = int_or_none(self._search_regex(
|
||||
@@ -216,7 +231,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||
formats.append({
|
||||
'format_id': 'fallback',
|
||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
'url': update_url_query(info['stream_url'], query),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
|
@@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
||||
r'(?s)<description>([^<]+)</description>',
|
||||
coursepage, 'description', fatal=False)
|
||||
|
||||
links = orderedSet(re.findall(r'<a href="(VideoPage.php\?[^"]+)">', coursepage))
|
||||
links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
|
||||
info['entries'] = [self.url_result(
|
||||
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
|
||||
) for l in links]
|
||||
@@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
||||
rootpage = self._download_webpage(rootURL, info['id'],
|
||||
errnote='Unable to download course info page')
|
||||
|
||||
links = orderedSet(re.findall(r'<a href="(CoursePage.php\?[^"]+)">', rootpage))
|
||||
links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
|
||||
info['entries'] = [self.url_result(
|
||||
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
|
||||
) for l in links]
|
||||
|
@@ -4,8 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
get_element_by_class,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,35 +27,39 @@ class SteamIE(InfoExtractor):
|
||||
'url': 'http://store.steampowered.com/video/105600/',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': 'f870007cee7065d7c76b88f0a45ecc07',
|
||||
'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
|
||||
'info_dict': {
|
||||
'id': '81300',
|
||||
'ext': 'flv',
|
||||
'title': 'Terraria 1.1 Trailer',
|
||||
'id': '2040428',
|
||||
'ext': 'mp4',
|
||||
'title': 'Terraria 1.3 Trailer',
|
||||
'playlist_index': 1,
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '61aaf31a5c5c3041afb58fb83cbb5751',
|
||||
'md5': '911672b20064ca3263fa89650ba5a7aa',
|
||||
'info_dict': {
|
||||
'id': '80859',
|
||||
'ext': 'flv',
|
||||
'title': 'Terraria Trailer',
|
||||
'id': '2029566',
|
||||
'ext': 'mp4',
|
||||
'title': 'Terraria 1.2 Trailer',
|
||||
'playlist_index': 2,
|
||||
}
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'id': '105600',
|
||||
'title': 'Terraria',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
|
||||
'info_dict': {
|
||||
'id': 'WB5DvDOOvAY',
|
||||
'id': 'X8kpJBlzD2E',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20140329',
|
||||
'title': 'FRONTIERS - Final Greenlight Trailer',
|
||||
'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
|
||||
'upload_date': '20140617',
|
||||
'title': 'FRONTIERS - Trapping',
|
||||
'description': 'md5:bf6f7f773def614054089e5769c12a6e',
|
||||
'uploader': 'AAD Productions',
|
||||
'uploader_id': 'AtomicAgeDogGames',
|
||||
}
|
||||
@@ -76,48 +82,65 @@ class SteamIE(InfoExtractor):
|
||||
self.report_age_confirmation()
|
||||
webpage = self._download_webpage(videourl, playlist_id)
|
||||
|
||||
flash_vars = self._parse_json(self._search_regex(
|
||||
r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage,
|
||||
'flash vars'), playlist_id, js_to_json)
|
||||
|
||||
playlist_title = None
|
||||
entries = []
|
||||
if fileID:
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
|
||||
mweb = re.finditer(r'''(?x)
|
||||
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
|
||||
YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
|
||||
''', webpage)
|
||||
videos = [{
|
||||
'_type': 'url',
|
||||
'url': vid.group('youtube_id'),
|
||||
'ie_key': 'Youtube',
|
||||
} for vid in mweb]
|
||||
else:
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
|
||||
|
||||
mweb = re.finditer(r'''(?x)
|
||||
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
|
||||
FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
|
||||
(,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
|
||||
''', webpage)
|
||||
titles = re.finditer(
|
||||
r'<span class="title">(?P<videoName>.+?)</span>', webpage)
|
||||
thumbs = re.finditer(
|
||||
r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
|
||||
videos = []
|
||||
|
||||
for vid, vtitle, thumb in zip(mweb, titles, thumbs):
|
||||
video_id = vid.group('videoID')
|
||||
title = vtitle.group('videoName')
|
||||
video_url = vid.group('videoURL')
|
||||
video_thumb = thumb.group('thumbnail')
|
||||
if not video_url:
|
||||
raise ExtractorError('Cannot find video url for %s' % video_id)
|
||||
videos.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': unescapeHTML(title),
|
||||
'thumbnail': video_thumb
|
||||
playlist_title = get_element_by_class('workshopItemTitle', webpage)
|
||||
for movie in flash_vars.values():
|
||||
if not movie:
|
||||
continue
|
||||
youtube_id = movie.get('YOUTUBE_VIDEO_ID')
|
||||
if not youtube_id:
|
||||
continue
|
||||
entries.append({
|
||||
'_type': 'url',
|
||||
'url': youtube_id,
|
||||
'ie_key': 'Youtube',
|
||||
})
|
||||
if not videos:
|
||||
else:
|
||||
playlist_title = get_element_by_class('apphub_AppName', webpage)
|
||||
for movie_id, movie in flash_vars.items():
|
||||
if not movie:
|
||||
continue
|
||||
video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False)
|
||||
title = movie.get('MOVIE_NAME')
|
||||
if not title or not video_id:
|
||||
continue
|
||||
entry = {
|
||||
'id': video_id,
|
||||
'title': title.replace('+', ' '),
|
||||
}
|
||||
formats = []
|
||||
flv_url = movie.get('FILENAME')
|
||||
if flv_url:
|
||||
formats.append({
|
||||
'format_id': 'flv',
|
||||
'url': flv_url,
|
||||
})
|
||||
highlight_element = self._search_regex(
|
||||
r'(<div[^>]+id="highlight_movie_%s"[^>]+>)' % video_id,
|
||||
webpage, 'highlight element', fatal=False)
|
||||
if highlight_element:
|
||||
highlight_attribs = extract_attributes(highlight_element)
|
||||
if highlight_attribs:
|
||||
entry['thumbnail'] = highlight_attribs.get('data-poster')
|
||||
for quality in ('', '-hd'):
|
||||
for ext in ('webm', 'mp4'):
|
||||
video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality))
|
||||
if video_url:
|
||||
formats.append({
|
||||
'format_id': ext + quality,
|
||||
'url': video_url,
|
||||
})
|
||||
if not formats:
|
||||
continue
|
||||
entry['formats'] = formats
|
||||
entries.append(entry)
|
||||
if not entries:
|
||||
raise ExtractorError('Could not find any videos')
|
||||
|
||||
return self.playlist_result(videos, playlist_id, playlist_title)
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
|
@@ -216,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
def hex_to_bytes(hex):
|
||||
return binascii.a2b_hex(hex.encode('ascii'))
|
||||
|
||||
relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
|
||||
relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
|
||||
clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
|
||||
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
|
||||
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
|
||||
|
@@ -57,10 +57,10 @@ class ThisAVIE(InfoExtractor):
|
||||
info_dict = self._extract_jwplayer_data(
|
||||
webpage, video_id, require_title=False)
|
||||
uploader = self._html_search_regex(
|
||||
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
||||
r': <a href="http://www\.thisav\.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
||||
webpage, 'uploader name', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
||||
r': <a href="http://www\.thisav\.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
|
||||
info_dict.update({
|
||||
|
@@ -13,11 +13,11 @@ from ..utils import (
|
||||
|
||||
|
||||
class TubiTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)'
|
||||
_LOGIN_URL = 'http://tubitv.com/login'
|
||||
_NETRC_MACHINE = 'tubitv'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
|
||||
'md5': '43ac06be9326f41912dc64ccf7a80320',
|
||||
'info_dict': {
|
||||
@@ -27,7 +27,13 @@ class TubiTvIE(InfoExtractor):
|
||||
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
|
||||
'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tubitv.com/movies/383676/tracker',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
|
@@ -18,7 +18,7 @@ class TV4IE(InfoExtractor):
|
||||
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
||||
tv4play\.se/
|
||||
(?:
|
||||
(?:program|barn)/(?:[^\?]+)\?video_id=|
|
||||
(?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
|
||||
iframe/video/|
|
||||
film/|
|
||||
sport/|
|
||||
@@ -63,6 +63,10 @@ class TV4IE(InfoExtractor):
|
||||
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv4play.se/program/farang/3922081',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -3,52 +3,50 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
float_or_none,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class TVAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videos\.tva\.ca/episode/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://videos\.tva\.ca/details/_(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://videos.tva.ca/episode/85538',
|
||||
'url': 'https://videos.tva.ca/details/_5596811470001',
|
||||
'info_dict': {
|
||||
'id': '85538',
|
||||
'id': '5596811470001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Épisode du 25 janvier 2017',
|
||||
'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98',
|
||||
'upload_date': '20170126',
|
||||
'timestamp': 1485442329,
|
||||
'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
|
||||
'uploader_id': '5481942443001',
|
||||
'upload_date': '20171003',
|
||||
'timestamp': 1507064617,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
"https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id,
|
||||
video_id, query={
|
||||
'$expand': 'Metadata,CustomId',
|
||||
'$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName',
|
||||
'$format': 'json',
|
||||
'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
metadata = video_data.get('Metadata', {})
|
||||
|
||||
def get_attribute(key):
|
||||
for attribute in video_data.get('attributes', []):
|
||||
if attribute.get('key') == key:
|
||||
return attribute.get('value')
|
||||
return None
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': video_data['Title'],
|
||||
'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}),
|
||||
'description': video_data.get('LongDescription') or video_data.get('ShortDescription'),
|
||||
'series': video_data.get('ShowName'),
|
||||
'episode': metadata.get('EpisodeTitle'),
|
||||
'episode_number': int_or_none(metadata.get('EpisodeNumber')),
|
||||
'categories': video_data.get('Categories'),
|
||||
'average_rating': video_data.get('AverageUserRating'),
|
||||
'timestamp': parse_iso8601(video_data.get('CreatedDate')),
|
||||
'ie_key': 'Ooyala',
|
||||
'title': get_attribute('title'),
|
||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
|
||||
'description': get_attribute('description'),
|
||||
'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
|
||||
'duration': float_or_none(get_attribute('video-duration'), 1000),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class TVN24IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
|
||||
'md5': 'fbdec753d7bc29d96036808275f2130c',
|
||||
@@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
|
||||
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
|
||||
'thumbnail': 're:http://.*[.]jpeg',
|
||||
'thumbnail': 're:https?://.*[.]jpeg',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
|
||||
@@ -29,6 +29,9 @@ class TVN24IE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -15,16 +15,16 @@ from ..utils import (
|
||||
class TVPIE(InfoExtractor):
|
||||
IE_NAME = 'tvp'
|
||||
IE_DESC = 'Telewizja Polska'
|
||||
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
|
||||
'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
|
||||
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
|
||||
'info_dict': {
|
||||
'id': '194536',
|
||||
'ext': 'mp4',
|
||||
'title': 'Czas honoru, I seria – odc. 13',
|
||||
'description': 'md5:76649d2014f65c99477be17f23a4dead',
|
||||
'description': 'md5:381afa5bca72655fe94b05cfe82bf53d',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
||||
@@ -37,12 +37,13 @@ class TVPIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# page id is not the same as video id(#7799)
|
||||
'url': 'http://vod.tvp.pl/22704887/08122015-1500',
|
||||
'md5': 'cf6a4705dfd1489aef8deb168d6ba742',
|
||||
'url': 'https://wiadomosci.tvp.pl/33908820/28092017-1930',
|
||||
'md5': '84cd3c8aec4840046e5ab712416b73d0',
|
||||
'info_dict': {
|
||||
'id': '22680786',
|
||||
'id': '33908820',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wiadomości, 08.12.2015, 15:00',
|
||||
'title': 'Wiadomości, 28.09.2017, 19:30',
|
||||
'description': 'Wydanie główne codziennego serwisu informacyjnego.'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
||||
|
@@ -15,7 +15,9 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@@ -224,6 +226,9 @@ class TVPlayIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||
|
||||
video_id = self._match_id(url)
|
||||
geo_country = self._search_regex(
|
||||
r'https?://[^/]+\.([a-z]{2})', url,
|
||||
@@ -426,4 +431,9 @@ class ViafreeIE(InfoExtractor):
|
||||
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
|
||||
webpage, 'video id')
|
||||
|
||||
return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key())
|
||||
return self.url_result(
|
||||
smuggle_url(
|
||||
'mtg:%s' % video_id,
|
||||
{'geo_countries': [
|
||||
compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]]}),
|
||||
ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user