Compare commits
280 Commits
2017.12.14
...
2018.02.26
Author | SHA1 | Date | |
---|---|---|---|
![]() |
6ab35f5e16 | ||
![]() |
32ae31847f | ||
![]() |
abe8766c35 | ||
![]() |
eaa3172672 | ||
![]() |
797c9284d6 | ||
![]() |
8c73ef37b6 | ||
![]() |
b5cbe3d652 | ||
![]() |
ece12e6348 | ||
![]() |
ff274e3c16 | ||
![]() |
c106237d56 | ||
![]() |
6e72ea4775 | ||
![]() |
d6a0350253 | ||
![]() |
ad29ef043e | ||
![]() |
f01df14c4f | ||
![]() |
9306b0c8d9 | ||
![]() |
f4b7427279 | ||
![]() |
300148b48a | ||
![]() |
2d17c63140 | ||
![]() |
f2908d072e | ||
![]() |
5e7841932c | ||
![]() |
870f3bfc63 | ||
![]() |
3d977fe4d2 | ||
![]() |
f075838728 | ||
![]() |
2acc11d771 | ||
![]() |
0704306e1d | ||
![]() |
9dc7ea320d | ||
![]() |
e231afb14f | ||
![]() |
12acb9a6fb | ||
![]() |
18ebd1a843 | ||
![]() |
8315ee6c4c | ||
![]() |
b9d1a79426 | ||
![]() |
09f934b009 | ||
![]() |
73af6e22fd | ||
![]() |
77e499f95e | ||
![]() |
befa4708fd | ||
![]() |
90830004c8 | ||
![]() |
18d7aa6efa | ||
![]() |
b12cf31bb1 | ||
![]() |
7d2b4aa047 | ||
![]() |
38662dfec7 | ||
![]() |
ee706f1009 | ||
![]() |
c4e7496421 | ||
![]() |
b8adcec4ea | ||
![]() |
073cca3df8 | ||
![]() |
f66df20ccd | ||
![]() |
ea69624992 | ||
![]() |
49702e3669 | ||
![]() |
59b5e7b280 | ||
![]() |
b9683400cf | ||
![]() |
760f81212f | ||
![]() |
79080573b5 | ||
![]() |
99892e9908 | ||
![]() |
8faa338ff3 | ||
![]() |
818df33fda | ||
![]() |
a072a12e24 | ||
![]() |
e67734dda9 | ||
![]() |
9e36fedd00 | ||
![]() |
d2b200eef9 | ||
![]() |
a03a3c80fe | ||
![]() |
9d5871fdff | ||
![]() |
ff873b5777 | ||
![]() |
042968ff31 | ||
![]() |
430f2ca544 | ||
![]() |
cbfbf07cdc | ||
![]() |
9e167e1ee3 | ||
![]() |
5828489072 | ||
![]() |
8c5fafe29f | ||
![]() |
6f4ad0db34 | ||
![]() |
c2b3bd0451 | ||
![]() |
728cee5385 | ||
![]() |
246a75b4ff | ||
![]() |
4fac463d70 | ||
![]() |
382b8182ce | ||
![]() |
ce53320b11 | ||
![]() |
51b0557d1e | ||
![]() |
5a5860825d | ||
![]() |
237d07f114 | ||
![]() |
9f4ec3de25 | ||
![]() |
96a0bbdd0d | ||
![]() |
c8064d4fab | ||
![]() |
fde677fed4 | ||
![]() |
0e0508c8a2 | ||
![]() |
bcf150e435 | ||
![]() |
240f26229d | ||
![]() |
b9b150def7 | ||
![]() |
d20225f33b | ||
![]() |
5399ab3f0c | ||
![]() |
b91a7a4e5e | ||
![]() |
e4a60912b8 | ||
![]() |
00c97e3e7a | ||
![]() |
cf7259bc93 | ||
![]() |
b54d4a5ce8 | ||
![]() |
db157d2a2a | ||
![]() |
6fcc053947 | ||
![]() |
a3e8146ea8 | ||
![]() |
f19eae429a | ||
![]() |
ba515388b8 | ||
![]() |
e2e18694db | ||
![]() |
4989d351b4 | ||
![]() |
1367c798e3 | ||
![]() |
9a340af37e | ||
![]() |
3c3bceb41d | ||
![]() |
64a12edb48 | ||
![]() |
4bf18702e6 | ||
![]() |
ecc218ab14 | ||
![]() |
d6b152915c | ||
![]() |
69a934e9ad | ||
![]() |
5fa2a6a561 | ||
![]() |
d2a422f548 | ||
![]() |
b9d52fb2ca | ||
![]() |
466000fc6b | ||
![]() |
65220c3bd6 | ||
![]() |
c989bdbef8 | ||
![]() |
eee1692ff3 | ||
![]() |
07e56e6df7 | ||
![]() |
3c3a07ee0b | ||
![]() |
27940ca09c | ||
![]() |
3931b84597 | ||
![]() |
a0ee342b50 | ||
![]() |
864a4576b7 | ||
![]() |
bbb7c3f7e9 | ||
![]() |
9d6458a206 | ||
![]() |
837b061710 | ||
![]() |
967ebbdb6c | ||
![]() |
dc400ed6a2 | ||
![]() |
cf2820710d | ||
![]() |
5d7d805ca9 | ||
![]() |
f206126df0 | ||
![]() |
021bd012bb | ||
![]() |
6e5eacb770 | ||
![]() |
d7da6db4e1 | ||
![]() |
721a0c3c7b | ||
![]() |
e0ab56571e | ||
![]() |
99d6e696fc | ||
![]() |
6289e07883 | ||
![]() |
655c410063 | ||
![]() |
b2a027fc6f | ||
![]() |
0d9c48de4f | ||
![]() |
df58ecbeba | ||
![]() |
ac458e90a3 | ||
![]() |
7df18fcc65 | ||
![]() |
c707b1d828 | ||
![]() |
c384d537f8 | ||
![]() |
e7f3529f68 | ||
![]() |
7d5406216a | ||
![]() |
2a3683c378 | ||
![]() |
154e4fdace | ||
![]() |
e2fc6df169 | ||
![]() |
68da3d033c | ||
![]() |
67408fe0e9 | ||
![]() |
cad9caf76b | ||
![]() |
4471affc34 | ||
![]() |
1370dba59f | ||
![]() |
1d1d60f6dd | ||
![]() |
a86922c470 | ||
![]() |
e11ccd76c6 | ||
![]() |
dd896a6a07 | ||
![]() |
391dd6f094 | ||
![]() |
0ce39bc542 | ||
![]() |
1915662d4f | ||
![]() |
54e8f62e01 | ||
![]() |
2d8bb80c60 | ||
![]() |
df16e645f6 | ||
![]() |
d4aedca3bd | ||
![]() |
47e2a9bc53 | ||
![]() |
e565a6386e | ||
![]() |
609850acfb | ||
![]() |
64287560e4 | ||
![]() |
37941fe204 | ||
![]() |
a90641fe87 | ||
![]() |
1b79daffd9 | ||
![]() |
e654829b4c | ||
![]() |
2b4e1ace4a | ||
![]() |
310ea4661d | ||
![]() |
5b23845125 | ||
![]() |
0f71de0761 | ||
![]() |
4df1098c3f | ||
![]() |
5eca00a2e3 | ||
![]() |
1dd38dc0f4 | ||
![]() |
8005dc68cb | ||
![]() |
a39e15c516 | ||
![]() |
7643916a37 | ||
![]() |
3a513f29ad | ||
![]() |
950b5f2969 | ||
![]() |
8faa9576bb | ||
![]() |
b0ead0e09a | ||
![]() |
0a5b1295b7 | ||
![]() |
a133eb7764 | ||
![]() |
f12628f934 | ||
![]() |
45283afdec | ||
![]() |
b7c74c0403 | ||
![]() |
0b0870f9d0 | ||
![]() |
c2f18e1c49 | ||
![]() |
da35331c6c | ||
![]() |
de329f64ab | ||
![]() |
75ba0efb52 | ||
![]() |
f0c6c2bce2 | ||
![]() |
9650c3e91d | ||
![]() |
b5e531f31a | ||
![]() |
7a6c204fcb | ||
![]() |
d7cd9a9e84 | ||
![]() |
54009c246e | ||
![]() |
b300cda476 | ||
![]() |
6648fd8ad6 | ||
![]() |
04cf1a191a | ||
![]() |
c95c08a856 | ||
![]() |
126f225bcf | ||
![]() |
4f5cf31977 | ||
![]() |
77341dae14 | ||
![]() |
2e65e7db9e | ||
![]() |
538d4f8681 | ||
![]() |
620ee8712e | ||
![]() |
2ca7ed41fe | ||
![]() |
8056c8542d | ||
![]() |
2501d41ef4 | ||
![]() |
d97cb84b31 | ||
![]() |
2c8e11b4af | ||
![]() |
d2c5b5a951 | ||
![]() |
580f3c79d5 | ||
![]() |
48058d82dc | ||
![]() |
6a41a12d29 | ||
![]() |
5c97ec5ff5 | ||
![]() |
9d6ac71c27 | ||
![]() |
84f085d4bd | ||
![]() |
a491fd0c6f | ||
![]() |
99277daaac | ||
![]() |
640788f6f4 | ||
![]() |
1ae0f0a21d | ||
![]() |
616bb95b28 | ||
![]() |
be069839b4 | ||
![]() |
c33de004e1 | ||
![]() |
42a1012c77 | ||
![]() |
a14001a5a1 | ||
![]() |
db145ee54a | ||
![]() |
45d20488f1 | ||
![]() |
2593651224 | ||
![]() |
951043724f | ||
![]() |
d2be5bb5af | ||
![]() |
447a5a710d | ||
![]() |
0f897e0929 | ||
![]() |
173558ce96 | ||
![]() |
d3ca283235 | ||
![]() |
d99a1000c7 | ||
![]() |
a75419586b | ||
![]() |
273c23d960 | ||
![]() |
b954e72c87 | ||
![]() |
116561697d | ||
![]() |
0e25a1a278 | ||
![]() |
307a7588b0 | ||
![]() |
c2f2f8b120 | ||
![]() |
f5a6321107 | ||
![]() |
69d69da98a | ||
![]() |
5c5e60cff8 | ||
![]() |
2132edaa03 | ||
![]() |
4b7dd1705a | ||
![]() |
9e3682d555 | ||
![]() |
3e191da6d9 | ||
![]() |
963d237d26 | ||
![]() |
d2d766bc6d | ||
![]() |
17c3aced5d | ||
![]() |
78466fcab5 | ||
![]() |
3961c6cb9d | ||
![]() |
07aeced68e | ||
![]() |
c10c93238e | ||
![]() |
4a109f81bc | ||
![]() |
99081da90c | ||
![]() |
7e81010987 | ||
![]() |
549bb416f5 | ||
![]() |
25475dfab3 | ||
![]() |
3dfa9ec213 | ||
![]() |
06dbcd7be4 | ||
![]() |
b555ae9bf1 | ||
![]() |
c402e7f3a0 | ||
![]() |
498a8a4ca5 | ||
![]() |
d05ba4b89e | ||
![]() |
23f511f5c7 | ||
![]() |
1c4804ef9b | ||
![]() |
0c69958844 | ||
![]() |
3281af3464 | ||
![]() |
29ac31afaf |
7
.github/ISSUE_TEMPLATE.md
vendored
7
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,12 +6,13 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.14**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.02.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.02.26**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
|
||||
- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
|
||||
|
||||
### What is the purpose of your *issue*?
|
||||
- [ ] Bug report (encountered problems with youtube-dl)
|
||||
@@ -35,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.12.14
|
||||
[debug] youtube-dl version 2018.02.26
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
1
.github/ISSUE_TEMPLATE_tmpl.md
vendored
1
.github/ISSUE_TEMPLATE_tmpl.md
vendored
@@ -12,6 +12,7 @@
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
|
||||
- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
|
||||
|
||||
### What is the purpose of your *issue*?
|
||||
- [ ] Bug report (encountered problems with youtube-dl)
|
||||
|
13
.travis.yml
13
.travis.yml
@@ -7,16 +7,21 @@ python:
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "pypy"
|
||||
- "pypy3"
|
||||
sudo: false
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
matrix:
|
||||
include:
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
fast_finish: true
|
||||
allow_failures:
|
||||
- env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
before_install:
|
||||
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
|
||||
script: ./devscripts/run_tests.sh
|
||||
notifications:
|
||||
email:
|
||||
- filippo.valsorda@gmail.com
|
||||
- yasoob.khld@gmail.com
|
||||
|
5
AUTHORS
5
AUTHORS
@@ -231,3 +231,8 @@ John Dong
|
||||
Tatsuyuki Ishi
|
||||
Daniel Weber
|
||||
Kay Bouché
|
||||
Yang Hongbo
|
||||
Lei Wang
|
||||
Petr Novák
|
||||
Leonardo Taccari
|
||||
Martin Weinelt
|
||||
|
282
ChangeLog
282
ChangeLog
@@ -1,3 +1,281 @@
|
||||
version 2018.02.26
|
||||
|
||||
Extractors
|
||||
* [udemy] Use custom User-Agent (#15571)
|
||||
|
||||
|
||||
version 2018.02.25
|
||||
|
||||
Core
|
||||
* [postprocessor/embedthumbnail] Skip embedding when there aren't any
|
||||
thumbnails (#12573)
|
||||
* [extractor/common] Improve jwplayer subtitles extraction (#15695)
|
||||
|
||||
Extractors
|
||||
+ [vidlii] Add support for vidlii.com (#14472, #14512, #14779)
|
||||
+ [streamango] Capture and output error messages
|
||||
* [streamango] Fix extraction (#14160, #14256)
|
||||
+ [telequebec] Add support for emissions (#14649, #14655)
|
||||
+ [telequebec:live] Add support for live streams (#15688)
|
||||
+ [mailru:music] Add support for mail.ru/music (#15618)
|
||||
* [aenetworks] Switch to akamai HLS formats (#15612)
|
||||
* [ytsearch] Fix flat title extraction (#11260, #15681)
|
||||
|
||||
|
||||
version 2018.02.22
|
||||
|
||||
Core
|
||||
+ [utils] Fixup some common URL typos in sanitize_url (#15649)
|
||||
* Respect --prefer-insecure while updating (#15497)
|
||||
|
||||
Extractors
|
||||
* [vidio] Fix HLS URL extraction (#15675)
|
||||
+ [nexx] Add support for arc.nexx.cloud URLs
|
||||
* [nexx] Switch to arc API (#15652)
|
||||
* [redtube] Fix duration extraction (#15659)
|
||||
+ [sonyliv] Respect referrer (#15648)
|
||||
+ [brightcove:new] Use referrer for formats' HTTP headers
|
||||
+ [cbc] Add support for olympics.cbc.ca (#15535)
|
||||
+ [fusion] Add support for fusion.tv (#15628)
|
||||
* [npo] Improve quality metadata extraction
|
||||
* [npo] Relax URL regular expression (#14987, #14994)
|
||||
+ [npo] Capture and output error message
|
||||
+ [pornhub] Add support for channels (#15613)
|
||||
* [youtube] Handle shared URLs with generic extractor (#14303)
|
||||
|
||||
|
||||
version 2018.02.11
|
||||
|
||||
Core
|
||||
+ [YoutubeDL] Add support for filesize_approx in format selector (#15550)
|
||||
|
||||
Extractors
|
||||
+ [francetv] Add support for live streams (#13689)
|
||||
+ [francetv] Add support for zouzous.fr and ludo.fr (#10454, #13087, #13103,
|
||||
#15012)
|
||||
* [francetv] Separate main extractor and rework others to delegate to it
|
||||
* [francetv] Improve manifest URL signing (#15536)
|
||||
+ [francetv] Sign m3u8 manifest URLs (#15565)
|
||||
+ [veoh] Add support for embed URLs (#15561)
|
||||
* [afreecatv] Fix extraction (#15556)
|
||||
* [periscope] Use accessVideoPublic endpoint (#15554)
|
||||
* [discovery] Fix auth request (#15542)
|
||||
+ [6play] Extract subtitles (#15541)
|
||||
* [newgrounds] Fix metadata extraction (#15531)
|
||||
+ [nbc] Add support for stream.nbcolympics.com (#10295)
|
||||
* [dvtv] Fix live streams extraction (#15442)
|
||||
|
||||
|
||||
version 2018.02.08
|
||||
|
||||
Extractors
|
||||
+ [myvi] Extend URL regular expression
|
||||
+ [myvi:embed] Add support for myvi.tv embeds (#15521)
|
||||
+ [prosiebensat1] Extend URL regular expression (#15520)
|
||||
* [pokemon] Relax URL regular expression and extend title extraction (#15518)
|
||||
+ [gameinformer] Use geo verification headers
|
||||
* [la7] Fix extraction (#15501, #15502)
|
||||
* [gameinformer] Fix brightcove id extraction (#15416)
|
||||
+ [afreecatv] Pass referrer to video info request (#15507)
|
||||
+ [telebruxelles] Add support for live streams
|
||||
* [telebruxelles] Relax URL regular expression
|
||||
* [telebruxelles] Fix extraction (#15504)
|
||||
* [extractor/common] Respect secure schemes in _extract_wowza_formats
|
||||
|
||||
|
||||
version 2018.02.04
|
||||
|
||||
Core
|
||||
* [downloader/http] Randomize HTTP chunk size
|
||||
+ [downloader/http] Add ability to pass downloader options via info dict
|
||||
* [downloader/http] Fix 302 infinite loops by not reusing requests
|
||||
+ Document http_chunk_size
|
||||
|
||||
Extractors
|
||||
+ [brightcove] Pass embed page URL as referrer (#15486)
|
||||
+ [youtube] Enforce using chunked HTTP downloading for DASH formats
|
||||
|
||||
|
||||
version 2018.02.03
|
||||
|
||||
Core
|
||||
+ Introduce --http-chunk-size for chunk-based HTTP downloading
|
||||
+ Add support for IronPython
|
||||
* [downloader/ism] Fix Python 3.2 support
|
||||
|
||||
Extractors
|
||||
* [redbulltv] Fix extraction (#15481)
|
||||
* [redtube] Fix metadata extraction (#15472)
|
||||
* [pladform] Respect platform id and extract HLS formats (#15468)
|
||||
- [rtlnl] Remove progressive formats (#15459)
|
||||
* [6play] Do no modify asset URLs with a token (#15248)
|
||||
* [nationalgeographic] Relax URL regular expression
|
||||
* [dplay] Relax URL regular expression (#15458)
|
||||
* [cbsinteractive] Fix data extraction (#15451)
|
||||
+ [amcnetworks] Add support for sundancetv.com (#9260)
|
||||
|
||||
|
||||
version 2018.01.27
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve _json_ld for articles
|
||||
* Switch codebase to use compat_b64decode
|
||||
+ [compat] Add compat_b64decode
|
||||
|
||||
Extractors
|
||||
+ [seznamzpravy] Add support for seznam.cz and seznamzpravy.cz (#14102, #14616)
|
||||
* [dplay] Bypass geo restriction
|
||||
+ [dplay] Add support for disco-api videos (#15396)
|
||||
* [youtube] Extract precise error messages (#15284)
|
||||
* [teachertube] Capture and output error message
|
||||
* [teachertube] Fix and relax thumbnail extraction (#15403)
|
||||
+ [prosiebensat1] Add another clip id regular expression (#15378)
|
||||
* [tbs] Update tokenizer url (#15395)
|
||||
* [mixcloud] Use compat_b64decode (#15394)
|
||||
- [thesixtyone] Remove extractor (#15341)
|
||||
|
||||
|
||||
version 2018.01.21
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve jwplayer DASH formats extraction (#9242, #15187)
|
||||
* [utils] Improve scientific notation handling in js_to_json (#14789)
|
||||
|
||||
Extractors
|
||||
+ [southparkdk] Add support for southparkstudios.nu
|
||||
+ [southpark] Add support for collections (#14803)
|
||||
* [franceinter] Fix upload date extraction (#14996)
|
||||
+ [rtvs] Add support for rtvs.sk (#9242, #15187)
|
||||
* [restudy] Fix extraction and extend URL regular expression (#15347)
|
||||
* [youtube:live] Improve live detection (#15365)
|
||||
+ [springboardplatform] Add support for springboardplatform.com
|
||||
* [prosiebensat1] Add another clip id regular expression (#15290)
|
||||
- [ringtv] Remove extractor (#15345)
|
||||
|
||||
|
||||
version 2018.01.18
|
||||
|
||||
Extractors
|
||||
* [soundcloud] Update client id (#15306)
|
||||
- [kamcord] Remove extractor (#15322)
|
||||
+ [spiegel] Add support for nexx videos (#15285)
|
||||
* [twitch] Fix authentication and error capture (#14090, #15264)
|
||||
* [vk] Detect more errors due to copyright complaints (#15259)
|
||||
|
||||
|
||||
version 2018.01.14
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix live streams extraction (#15202)
|
||||
* [wdr] Bypass geo restriction
|
||||
* [wdr] Rework extractors (#14598)
|
||||
+ [wdr] Add support for wdrmaus.de/elefantenseite (#14598)
|
||||
+ [gamestar] Add support for gamepro.de (#3384)
|
||||
* [viafree] Skip rtmp formats (#15232)
|
||||
+ [pandoratv] Add support for mobile URLs (#12441)
|
||||
+ [pandoratv] Add support for new URL format (#15131)
|
||||
+ [ximalaya] Add support for ximalaya.com (#14687)
|
||||
+ [digg] Add support for digg.com (#15214)
|
||||
* [limelight] Tolerate empty pc formats (#15150, #15151, #15207)
|
||||
* [ndr:embed:base] Make separate formats extraction non fatal (#15203)
|
||||
+ [weibo] Add extractor (#15079)
|
||||
+ [ok] Add support for live streams
|
||||
* [canalplus] Fix extraction (#15072)
|
||||
* [bilibili] Fix extraction (#15188)
|
||||
|
||||
|
||||
version 2018.01.07
|
||||
|
||||
Core
|
||||
* [utils] Fix youtube-dl under PyPy3 on Windows
|
||||
* [YoutubeDL] Output python implementation in debug header
|
||||
|
||||
Extractors
|
||||
+ [jwplatform] Add support for multiple embeds (#15192)
|
||||
* [mitele] Fix extraction (#15186)
|
||||
+ [motherless] Add support for groups (#15124)
|
||||
* [lynda] Relax URL regular expression (#15185)
|
||||
* [soundcloud] Fallback to avatar picture for thumbnail (#12878)
|
||||
* [youku] Fix list extraction (#15135)
|
||||
* [openload] Fix extraction (#15166)
|
||||
* [lynda] Skip invalid subtitles (#15159)
|
||||
* [twitch] Pass video id to url_result when extracting playlist (#15139)
|
||||
* [rtve.es:alacarta] Fix extraction of some new URLs
|
||||
* [acast] Fix extraction (#15147)
|
||||
|
||||
|
||||
version 2017.12.31
|
||||
|
||||
Core
|
||||
+ [extractor/common] Add container meta field for formats extracted
|
||||
in _parse_mpd_formats (#13616)
|
||||
+ [downloader/hls] Use HTTP headers for key request
|
||||
* [common] Use AACL as the default fourcc when AudioTag is 255
|
||||
* [extractor/common] Fix extraction of DASH formats with the same
|
||||
representation id (#15111)
|
||||
|
||||
Extractors
|
||||
+ [slutload] Add support for mobile URLs (#14806)
|
||||
* [abc:iview] Bypass geo restriction
|
||||
* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985,
|
||||
#15035, #15057, #15061, #15071, #15095, #15106)
|
||||
* [openload] Fix extraction (#15118)
|
||||
- [sandia] Remove extractor
|
||||
- [collegerama] Remove extractor
|
||||
+ [mediasite] Add support for sites based on Mediasite Video Platform (#5428,
|
||||
#11185, #14343)
|
||||
+ [ufctv] Add support for ufc.tv (#14520)
|
||||
* [pluralsight] Fix missing first line of subtitles (#11118)
|
||||
* [openload] Fallback on f-page extraction (#14665, #14879)
|
||||
* [vimeo] Improve password protected videos extraction (#15114)
|
||||
* [aws] Fix canonical/signed headers generation on python 2 (#15102)
|
||||
|
||||
|
||||
version 2017.12.28
|
||||
|
||||
Extractors
|
||||
+ [internazionale] Add support for internazionale.it (#14973)
|
||||
* [playtvak] Relax video regular expression and make description optional
|
||||
(#15037)
|
||||
+ [filmweb] Add support for filmweb.no (#8773, #10368)
|
||||
+ [23video] Add support for 23video.com
|
||||
+ [espn] Add support for fivethirtyeight.com (#6864)
|
||||
+ [umg:de] Add support for universal-music.de (#11582, #11584)
|
||||
+ [espn] Add support for espnfc and extract more formats (#8053)
|
||||
* [youku] Update ccode (#14880)
|
||||
+ [openload] Add support for oload.stream (#15070)
|
||||
* [youku] Fix list extraction (#15065)
|
||||
|
||||
|
||||
version 2017.12.23
|
||||
|
||||
Core
|
||||
* [extractor/common] Move X-Forwarded-For setup code into _request_webpage
|
||||
+ [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in
|
||||
output template (#11427, #15018)
|
||||
+ [extractor/common] Introduce uploader, uploader_id and uploader_url
|
||||
meta fields for playlists (#11427, #15018)
|
||||
* [downloader/fragment] Encode filename of fragment being removed (#15020)
|
||||
+ [utils] Add another date format pattern (#14999)
|
||||
|
||||
Extractors
|
||||
+ [kaltura] Add another embed pattern for entry_id
|
||||
+ [7plus] Add support for 7plus.com.au (#15043)
|
||||
* [animeondemand] Relax login error regular expression
|
||||
+ [shahid] Add support for show pages (#7401)
|
||||
+ [youtube] Extract uploader, uploader_id and uploader_url for playlists
|
||||
(#11427, #15018)
|
||||
* [afreecatv] Improve format extraction (#15019)
|
||||
+ [cspan] Add support for audio only pages and catch page errors (#14995)
|
||||
+ [mailru] Add support for embed URLs (#14904)
|
||||
* [crunchyroll] Future-proof XML element checks (#15013)
|
||||
* [cbslocal] Fix timestamp extraction (#14999, #15000)
|
||||
* [discoverygo] Correct TTML subtitle extension
|
||||
* [vk] Make view count optional (#14979)
|
||||
* [disney] Skip Apple FairPlay formats (#14982)
|
||||
* [voot] Fix format extraction (#14758)
|
||||
|
||||
|
||||
version 2017.12.14
|
||||
|
||||
Core
|
||||
@@ -148,8 +426,8 @@ Extractors
|
||||
+ [fxnetworks] Extract series metadata (#14603)
|
||||
+ [younow] Add support for younow.com (#9255, #9432, #12436)
|
||||
* [dctptv] Fix extraction (#14599)
|
||||
* [youtube] Restrict embed regex (#14600)
|
||||
* [vimeo] Restrict iframe embed regex (#14600)
|
||||
* [youtube] Restrict embed regular expression (#14600)
|
||||
* [vimeo] Restrict iframe embed regular expression (#14600)
|
||||
* [soundgasm] Improve extraction (#14588)
|
||||
- [myvideo] Remove extractor (#8557)
|
||||
+ [nbc] Add support for classic-tv videos (#14575)
|
||||
|
@@ -1,7 +1,9 @@
|
||||
include README.md
|
||||
include test/*.py
|
||||
include test/*.json
|
||||
include LICENSE
|
||||
include AUTHORS
|
||||
include ChangeLog
|
||||
include youtube-dl.bash-completion
|
||||
include youtube-dl.fish
|
||||
include youtube-dl.1
|
||||
recursive-include docs Makefile conf.py *.rst
|
||||
recursive-include test *
|
||||
|
4
Makefile
4
Makefile
@@ -110,7 +110,7 @@ _EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -in
|
||||
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog AUTHORS
|
||||
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
--exclude '*.kate-swp' \
|
||||
@@ -122,7 +122,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude 'docs/_build' \
|
||||
-- \
|
||||
bin devscripts test youtube_dl docs \
|
||||
ChangeLog LICENSE README.md README.txt \
|
||||
ChangeLog AUTHORS LICENSE README.md README.txt \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
|
||||
youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \
|
||||
youtube-dl
|
||||
|
14
README.md
14
README.md
@@ -46,7 +46,7 @@ Or with [MacPorts](https://www.macports.org/):
|
||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
|
||||
|
||||
# DESCRIPTION
|
||||
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
||||
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
youtube-dl [OPTIONS] URL [URL...]
|
||||
|
||||
@@ -198,6 +198,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
size. By default, the buffer size is
|
||||
automatically resized from an initial value
|
||||
of SIZE.
|
||||
--http-chunk-size SIZE Size of a chunk for chunk-based HTTP
|
||||
downloading (e.g. 10485760 or 10M) (default
|
||||
is disabled). May be useful for bypassing
|
||||
bandwidth throttling imposed by a webserver
|
||||
(experimental)
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--playlist-random Download playlist videos in random order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
@@ -305,8 +310,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
information whenever possible
|
||||
--user-agent UA Specify a custom user agent
|
||||
--referer URL Specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
@@ -539,6 +543,8 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||
- `playlist_id` (string): Playlist identifier
|
||||
- `playlist_title` (string): Playlist title
|
||||
- `playlist_uploader` (string): Full name of the playlist uploader
|
||||
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
|
||||
@@ -861,7 +867,7 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
||||
|
||||
|
5
devscripts/install_jython.sh
Executable file
5
devscripts/install_jython.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar
|
||||
java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython"
|
||||
$HOME/jython/bin/jython -m pip install nose
|
@@ -3,6 +3,7 @@
|
||||
- **1up.com**
|
||||
- **20min**
|
||||
- **220.ro**
|
||||
- **23video**
|
||||
- **24video**
|
||||
- **3qsdn**: 3Q SDN
|
||||
- **3sat**
|
||||
@@ -10,6 +11,7 @@
|
||||
- **56.com**
|
||||
- **5min**
|
||||
- **6play**
|
||||
- **7plus**
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9c9media**
|
||||
@@ -126,13 +128,14 @@
|
||||
- **CamdemyFolder**
|
||||
- **CamWithHer**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **Canalplus**: mycanal.fr and piwiplus.fr
|
||||
- **Canvas**
|
||||
- **CanvasEen**: canvas.be and een.be
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CartoonNetwork**
|
||||
- **cbc.ca**
|
||||
- **cbc.ca:olympics**
|
||||
- **cbc.ca:player**
|
||||
- **cbc.ca:watch**
|
||||
- **cbc.ca:watch:video**
|
||||
@@ -169,7 +172,6 @@
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
- **CollegeRama**
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralFullEpisodes**
|
||||
@@ -188,7 +190,7 @@
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTVNews**
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **Culturebox**
|
||||
- **CultureUnplugged**
|
||||
- **curiositystream**
|
||||
- **curiositystream:collection**
|
||||
@@ -209,6 +211,7 @@
|
||||
- **defense.gouv.fr**
|
||||
- **democracynow**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Digg**
|
||||
- **DigitallySpeaking**
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
@@ -268,6 +271,8 @@
|
||||
- **Fczenit**
|
||||
- **filmon**
|
||||
- **filmon:channel**
|
||||
- **Filmweb**
|
||||
- **FiveThirtyEight**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Flipagram**
|
||||
@@ -287,6 +292,8 @@
|
||||
- **FranceTV**
|
||||
- **FranceTVEmbed**
|
||||
- **francetvinfo.fr**
|
||||
- **FranceTVJeunesse**
|
||||
- **FranceTVSite**
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
@@ -358,6 +365,7 @@
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **Internazionale**
|
||||
- **InternetVideoArchive**
|
||||
- **IPrima**
|
||||
- **iqiyi**: 爱奇艺
|
||||
@@ -378,7 +386,6 @@
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
- **Kamcord**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
@@ -433,6 +440,8 @@
|
||||
- **m6**
|
||||
- **macgamestore**: MacGameStore trailers
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MakersChannel**
|
||||
- **MakerTV**
|
||||
- **mangomolo:live**
|
||||
@@ -444,6 +453,7 @@
|
||||
- **media.ccc.de**
|
||||
- **Medialaan**
|
||||
- **Mediaset**
|
||||
- **Mediasite**
|
||||
- **Medici**
|
||||
- **megaphone.fm**: megaphone.fm embedded players
|
||||
- **Meipai**: 美拍
|
||||
@@ -473,6 +483,7 @@
|
||||
- **Moniker**: allmyvideos.net and vidspot.net
|
||||
- **Morningstar**: morningstar.com
|
||||
- **Motherless**
|
||||
- **MotherlessGroup**
|
||||
- **Motorsport**: motorsport.com
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
@@ -496,6 +507,7 @@
|
||||
- **MySpass**
|
||||
- **Myvi**
|
||||
- **MyVidster**
|
||||
- **MyviEmbed**
|
||||
- **n-tv.de**
|
||||
- **natgeo**
|
||||
- **natgeo:episodeguide**
|
||||
@@ -504,7 +516,8 @@
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **NBCOlympics**
|
||||
- **nbcolympics**
|
||||
- **nbcolympics:stream**
|
||||
- **NBCSports**
|
||||
- **NBCSportsVPlayer**
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
@@ -676,7 +689,6 @@
|
||||
- **revision**
|
||||
- **revision3:embed**
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RMCDecouverte**
|
||||
- **RockstarGames**
|
||||
- **RoosterTeeth**
|
||||
@@ -697,6 +709,7 @@
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **rtve.es:television**
|
||||
- **RTVNH**
|
||||
- **RTVS**
|
||||
- **Rudo**
|
||||
- **RUHD**
|
||||
- **RulePorn**
|
||||
@@ -712,7 +725,6 @@
|
||||
- **safari**: safaribooksonline.com online video
|
||||
- **safari:api**
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
- **Sandia**: Sandia National Laboratories
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
@@ -727,7 +739,10 @@
|
||||
- **ServingSys**
|
||||
- **Servus**
|
||||
- **Sexu**
|
||||
- **SeznamZpravy**
|
||||
- **SeznamZpravyArticle**
|
||||
- **Shahid**
|
||||
- **ShahidShow**
|
||||
- **Shared**: shared.sx
|
||||
- **ShowRoomLive**
|
||||
- **Sina**
|
||||
@@ -767,7 +782,7 @@
|
||||
- **Sport5**
|
||||
- **SportBoxEmbed**
|
||||
- **SportDeutschland**
|
||||
- **Sportschau**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
@@ -807,6 +822,8 @@
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleQuebec**
|
||||
- **TeleQuebecEmission**
|
||||
- **TeleQuebecLive**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TF1**
|
||||
@@ -816,7 +833,6 @@
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheScene**
|
||||
- **TheSixtyOne**
|
||||
- **TheStar**
|
||||
- **TheSun**
|
||||
- **TheWeatherChannel**
|
||||
@@ -886,7 +902,9 @@
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **UFCTV**
|
||||
- **UKTVPlay**
|
||||
- **umg:de**: Universal Music Deutschland
|
||||
- **Unistra**
|
||||
- **Unity**
|
||||
- **uol.com.br**
|
||||
@@ -932,6 +950,7 @@
|
||||
- **VideoPress**
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidio**
|
||||
- **VidLii**
|
||||
- **vidme**
|
||||
- **vidme:user**
|
||||
- **vidme:user:likes**
|
||||
@@ -994,10 +1013,14 @@
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **WDRElefant**
|
||||
- **WDRPage**
|
||||
- **Webcaster**
|
||||
- **WebcasterFeed**
|
||||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **Weibo**
|
||||
- **WeiboMobile**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **wholecloud**: WholeCloud
|
||||
- **Wimp**
|
||||
@@ -1017,6 +1040,8 @@
|
||||
- **xiami:artist**: 虾米音乐 - 歌手
|
||||
- **xiami:collection**: 虾米音乐 - 精选集
|
||||
- **xiami:song**: 虾米音乐
|
||||
- **ximalaya**: 喜马拉雅FM
|
||||
- **ximalaya:album**: 喜马拉雅FM 专辑
|
||||
- **XMinus**
|
||||
- **XNXX**
|
||||
- **Xstream**
|
||||
|
@@ -3,4 +3,4 @@ universal = True
|
||||
|
||||
[flake8]
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
|
||||
ignore = E402,E501,E731
|
||||
ignore = E402,E501,E731,E741
|
||||
|
1
setup.py
1
setup.py
@@ -109,6 +109,7 @@ setup(
|
||||
author_email='ytdl@yt-dl.org',
|
||||
maintainer='Sergey M.',
|
||||
maintainer_email='dstftw@gmail.com',
|
||||
license='Unlicense',
|
||||
packages=[
|
||||
'youtube_dl',
|
||||
'youtube_dl.extractor', 'youtube_dl.downloader',
|
||||
|
@@ -493,9 +493,20 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
_TEST_CASES = [
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/13919
|
||||
# Also tests duplicate representation ids, see
|
||||
# https://github.com/rg3/youtube-dl/issues/15111
|
||||
'float_duration',
|
||||
'http://unknown/manifest.mpd',
|
||||
[{
|
||||
'manifest_url': 'http://unknown/manifest.mpd',
|
||||
'ext': 'm4a',
|
||||
'format_id': '318597',
|
||||
'format_note': 'DASH audio',
|
||||
'protocol': 'http_dash_segments',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'none',
|
||||
'tbr': 61.587,
|
||||
}, {
|
||||
'manifest_url': 'http://unknown/manifest.mpd',
|
||||
'ext': 'mp4',
|
||||
'format_id': '318597',
|
||||
|
@@ -92,8 +92,8 @@ class TestDownload(unittest.TestCase):
|
||||
def generator(test_case, tname):
|
||||
|
||||
def test_template(self):
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])()
|
||||
other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])]
|
||||
is_playlist = any(k.startswith('playlist') for k in test_case)
|
||||
test_cases = test_case.get(
|
||||
'playlist', [] if is_playlist else [test_case])
|
||||
|
125
test/test_downloader_http.py
Normal file
125
test/test_downloader_http.py
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
from youtube_dl.downloader.http import HttpFD
|
||||
from youtube_dl.utils import encodeFilename
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
TEST_SIZE = 10 * 1024
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def send_content_range(self, total=None):
|
||||
range_header = self.headers.get('Range')
|
||||
start = end = None
|
||||
if range_header:
|
||||
mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header)
|
||||
if mobj:
|
||||
start = int(mobj.group(1))
|
||||
end = int(mobj.group(2))
|
||||
valid_range = start is not None and end is not None
|
||||
if valid_range:
|
||||
content_range = 'bytes %d-%d' % (start, end)
|
||||
if total:
|
||||
content_range += '/%d' % total
|
||||
self.send_header('Content-Range', content_range)
|
||||
return (end - start + 1) if valid_range else total
|
||||
|
||||
def serve(self, range=True, content_length=True):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
size = TEST_SIZE
|
||||
if range:
|
||||
size = self.send_content_range(TEST_SIZE)
|
||||
if content_length:
|
||||
self.send_header('Content-Length', size)
|
||||
self.end_headers()
|
||||
self.wfile.write(b'#' * size)
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/regular':
|
||||
self.serve()
|
||||
elif self.path == '/no-content-length':
|
||||
self.serve(content_length=False)
|
||||
elif self.path == '/no-range':
|
||||
self.serve(range=False)
|
||||
elif self.path == '/no-range-no-content-length':
|
||||
self.serve(range=False, content_length=False)
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class FakeLogger(object):
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestHttpFD(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def download(self, params, ep):
|
||||
params['logger'] = FakeLogger()
|
||||
ydl = YoutubeDL(params)
|
||||
downloader = HttpFD(ydl, params)
|
||||
filename = 'testfile.mp4'
|
||||
try_rm(encodeFilename(filename))
|
||||
self.assertTrue(downloader.real_download(filename, {
|
||||
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
|
||||
}))
|
||||
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
|
||||
try_rm(encodeFilename(filename))
|
||||
|
||||
def download_all(self, params):
|
||||
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
|
||||
self.download(params, ep)
|
||||
|
||||
def test_regular(self):
|
||||
self.download_all({})
|
||||
|
||||
def test_chunked(self):
|
||||
self.download_all({
|
||||
'http_chunk_size': 1000,
|
||||
})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -47,7 +47,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
self.end_headers()
|
||||
return
|
||||
|
||||
new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
|
||||
new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
|
||||
self.send_response(302)
|
||||
self.send_header(b'Location', new_url.encode('utf-8'))
|
||||
self.end_headers()
|
||||
@@ -74,7 +74,7 @@ class FakeLogger(object):
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
@@ -86,15 +86,15 @@ class TestHTTP(unittest.TestCase):
|
||||
return
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
r = ydl.extract_info('http://localhost:%d/302' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
class TestHTTPS(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
self.httpd.socket = ssl.wrap_socket(
|
||||
self.httpd.socket, certfile=certfn, server_side=True)
|
||||
self.port = http_server_port(self.httpd)
|
||||
@@ -107,11 +107,11 @@ class TestHTTPS(unittest.TestCase):
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
self.assertRaises(
|
||||
Exception,
|
||||
ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
|
||||
ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
@@ -132,23 +132,23 @@ def _build_proxy_handler(name):
|
||||
class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('normal'))
|
||||
('127.0.0.1', 0), _build_proxy_handler('normal'))
|
||||
self.port = http_server_port(self.proxy)
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.geo_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('geo'))
|
||||
('127.0.0.1', 0), _build_proxy_handler('geo'))
|
||||
self.geo_port = http_server_port(self.geo_proxy)
|
||||
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
|
||||
self.geo_proxy_thread.daemon = True
|
||||
self.geo_proxy_thread.start()
|
||||
|
||||
def test_proxy(self):
|
||||
geo_proxy = 'localhost:{0}'.format(self.geo_port)
|
||||
geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
|
||||
ydl = YoutubeDL({
|
||||
'proxy': 'localhost:{0}'.format(self.port),
|
||||
'proxy': '127.0.0.1:{0}'.format(self.port),
|
||||
'geo_verification_proxy': geo_proxy,
|
||||
})
|
||||
url = 'http://foo.com/bar'
|
||||
@@ -162,7 +162,7 @@ class TestProxy(unittest.TestCase):
|
||||
|
||||
def test_proxy_with_idn(self):
|
||||
ydl = YoutubeDL({
|
||||
'proxy': 'localhost:{0}'.format(self.port),
|
||||
'proxy': '127.0.0.1:{0}'.format(self.port),
|
||||
})
|
||||
url = 'http://中文.tw/'
|
||||
response = ydl.urlopen(url).read().decode('utf-8')
|
||||
|
@@ -57,6 +57,7 @@ from youtube_dl.utils import (
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url,
|
||||
expand_path,
|
||||
prepend_extension,
|
||||
replace_extension,
|
||||
@@ -219,6 +220,12 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||
|
||||
def test_sanitize_url(self):
|
||||
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
|
||||
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
|
||||
self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
|
||||
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
|
||||
|
||||
def test_expand_path(self):
|
||||
def env(var):
|
||||
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
|
||||
@@ -343,6 +350,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@@ -813,6 +821,9 @@ class TestUtil(unittest.TestCase):
|
||||
inp = '''{"duration": "00:01:07"}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
|
||||
|
||||
inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
@@ -884,6 +895,13 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'42': 42})
|
||||
|
||||
on = js_to_json('{42:4.2e1}')
|
||||
self.assertEqual(json.loads(on), {'42': 42.0})
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
|
@@ -298,7 +298,8 @@ class YoutubeDL(object):
|
||||
the downloader (see youtube_dl/downloader/common.py):
|
||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||
xattr_set_filesize, external_downloader_args, hls_use_mpegts.
|
||||
xattr_set_filesize, external_downloader_args, hls_use_mpegts,
|
||||
http_chunk_size.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
@@ -975,6 +976,8 @@ class YoutubeDL(object):
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
@@ -1030,7 +1033,7 @@ class YoutubeDL(object):
|
||||
'!=': operator.ne,
|
||||
}
|
||||
operator_rex = re.compile(r'''(?x)\s*
|
||||
(?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
|
||||
(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||
$
|
||||
@@ -2231,8 +2234,16 @@ class YoutubeDL(object):
|
||||
sys.exc_clear()
|
||||
except Exception:
|
||||
pass
|
||||
self._write_string('[debug] Python version %s - %s\n' % (
|
||||
platform.python_version(), platform_name()))
|
||||
|
||||
def python_implementation():
|
||||
impl_name = platform.python_implementation()
|
||||
if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
|
||||
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
|
||||
return impl_name
|
||||
|
||||
self._write_string('[debug] Python version %s (%s) - %s\n' % (
|
||||
platform.python_version(), python_implementation(),
|
||||
platform_name()))
|
||||
|
||||
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||
|
@@ -191,6 +191,11 @@ def _real_main(argv=None):
|
||||
if numeric_buffersize is None:
|
||||
parser.error('invalid buffer size specified')
|
||||
opts.buffersize = numeric_buffersize
|
||||
if opts.http_chunk_size is not None:
|
||||
numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
|
||||
if not numeric_chunksize:
|
||||
parser.error('invalid http chunk size specified')
|
||||
opts.http_chunk_size = numeric_chunksize
|
||||
if opts.playliststart <= 0:
|
||||
raise ValueError('Playlist start must be positive')
|
||||
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
|
||||
@@ -346,6 +351,7 @@ def _real_main(argv=None):
|
||||
'keep_fragments': opts.keep_fragments,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'http_chunk_size': opts.http_chunk_size,
|
||||
'continuedl': opts.continue_dl,
|
||||
'noprogress': opts.noprogress,
|
||||
'progress_with_newline': opts.progress_with_newline,
|
||||
@@ -432,7 +438,7 @@ def _real_main(argv=None):
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
# Update version
|
||||
if opts.update_self:
|
||||
update_self(ydl.to_screen, opts.verbose, ydl._opener)
|
||||
update_self(ydl.to_screen, opts.verbose, ydl._opener, opts.prefer_insecure)
|
||||
|
||||
# Remove cache dir
|
||||
if opts.rm_cachedir:
|
||||
|
@@ -1,8 +1,8 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
from math import ceil
|
||||
|
||||
from .compat import compat_b64decode
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
@@ -180,7 +180,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
"""
|
||||
NONCE_LENGTH_BYTES = 8
|
||||
|
||||
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
|
||||
data = bytes_to_intlist(compat_b64decode(data))
|
||||
password = bytes_to_intlist(password.encode('utf-8'))
|
||||
|
||||
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
|
||||
|
@@ -1,14 +1,17 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import collections
|
||||
import ctypes
|
||||
import email
|
||||
import getpass
|
||||
import io
|
||||
import itertools
|
||||
import optparse
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shlex
|
||||
import shutil
|
||||
@@ -2894,9 +2897,24 @@ except TypeError:
|
||||
if isinstance(spec, compat_str):
|
||||
spec = spec.encode('ascii')
|
||||
return struct.unpack(spec, *args)
|
||||
|
||||
class compat_Struct(struct.Struct):
|
||||
def __init__(self, fmt):
|
||||
if isinstance(fmt, compat_str):
|
||||
fmt = fmt.encode('ascii')
|
||||
super(compat_Struct, self).__init__(fmt)
|
||||
else:
|
||||
compat_struct_pack = struct.pack
|
||||
compat_struct_unpack = struct.unpack
|
||||
if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
|
||||
class compat_Struct(struct.Struct):
|
||||
def unpack(self, string):
|
||||
if not isinstance(string, buffer): # noqa: F821
|
||||
string = buffer(string) # noqa: F821
|
||||
return super(compat_Struct, self).unpack(string)
|
||||
else:
|
||||
compat_Struct = struct.Struct
|
||||
|
||||
|
||||
try:
|
||||
from future_builtins import zip as compat_zip
|
||||
@@ -2906,14 +2924,45 @@ except ImportError: # not 2.6+ or is 3.x
|
||||
except ImportError:
|
||||
compat_zip = zip
|
||||
|
||||
|
||||
if sys.version_info < (3, 3):
|
||||
def compat_b64decode(s, *args, **kwargs):
|
||||
if isinstance(s, compat_str):
|
||||
s = s.encode('ascii')
|
||||
return base64.b64decode(s, *args, **kwargs)
|
||||
else:
|
||||
compat_b64decode = base64.b64decode
|
||||
|
||||
|
||||
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
||||
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
||||
# names, see the original PyPy issue [1] and the youtube-dl one [2].
|
||||
# 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
|
||||
# 2. https://github.com/rg3/youtube-dl/pull/4392
|
||||
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
|
||||
real = ctypes.WINFUNCTYPE(*args, **kwargs)
|
||||
|
||||
def resf(tpl, *args, **kwargs):
|
||||
funcname, dll = tpl
|
||||
return real((str(funcname), dll), *args, **kwargs)
|
||||
|
||||
return resf
|
||||
else:
|
||||
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
|
||||
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'compat_HTMLParseError',
|
||||
'compat_HTMLParser',
|
||||
'compat_HTTPError',
|
||||
'compat_Struct',
|
||||
'compat_b64decode',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_cookies',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
|
@@ -49,6 +49,9 @@ class FileDownloader(object):
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||
http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
|
||||
useful for bypassing bandwidth throttling imposed by
|
||||
a webserver (experimental)
|
||||
|
||||
Subclasses of this one must re-define the real_download method.
|
||||
"""
|
||||
|
@@ -1,12 +1,12 @@
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
import itertools
|
||||
import time
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
compat_urllib_error,
|
||||
@@ -312,7 +312,7 @@ class F4mFD(FragmentFD):
|
||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||
else:
|
||||
bootstrap_url = None
|
||||
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||
bootstrap = compat_b64decode(node.text)
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
return boot_info, bootstrap_url
|
||||
|
||||
@@ -349,7 +349,7 @@ class F4mFD(FragmentFD):
|
||||
live = boot_info['live']
|
||||
metadata_node = media.find(_add_ns('metadata'))
|
||||
if metadata_node is not None:
|
||||
metadata = base64.b64decode(metadata_node.text.encode('ascii'))
|
||||
metadata = compat_b64decode(metadata_node.text)
|
||||
else:
|
||||
metadata = None
|
||||
|
||||
|
@@ -112,7 +112,7 @@ class FragmentFD(FileDownloader):
|
||||
if self.__do_ytdl_file(ctx):
|
||||
self._write_ytdl_file(ctx)
|
||||
if not self.params.get('keep_fragments', False):
|
||||
os.remove(ctx['fragment_filename_sanitized'])
|
||||
os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
|
@@ -163,7 +163,8 @@ class HlsFD(FragmentFD):
|
||||
return False
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
self._prepare_url(info_dict, decrypt_info['URI'])).read()
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
|
@@ -4,13 +4,18 @@ import errno
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_urllib_error
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
)
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
encodeFilename,
|
||||
int_or_none,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
write_xattr,
|
||||
@@ -38,21 +43,26 @@ class HttpFD(FileDownloader):
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
basic_request = sanitized_Request(url, None, headers)
|
||||
request = sanitized_Request(url, None, headers)
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
|
||||
if is_test:
|
||||
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
||||
chunk_size = self._TEST_FILE_SIZE if is_test else (
|
||||
info_dict.get('downloader_options', {}).get('http_chunk_size') or
|
||||
self.params.get('http_chunk_size') or 0)
|
||||
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.resume_len = 0
|
||||
ctx.data_len = None
|
||||
ctx.block_size = self.params.get('buffersize', 1024)
|
||||
ctx.start_time = time.time()
|
||||
ctx.chunk_size = None
|
||||
|
||||
if self.params.get('continuedl', True):
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
ctx.resume_len = os.path.getsize(
|
||||
encodeFilename(ctx.tmpfilename))
|
||||
|
||||
ctx.is_resume = ctx.resume_len > 0
|
||||
|
||||
count = 0
|
||||
retries = self.params.get('retries', 0)
|
||||
@@ -64,11 +74,36 @@ class HttpFD(FileDownloader):
|
||||
def __init__(self, source_error):
|
||||
self.source_error = source_error
|
||||
|
||||
class NextFragment(Exception):
|
||||
pass
|
||||
|
||||
def set_range(req, start, end):
|
||||
range_header = 'bytes=%d-' % start
|
||||
if end:
|
||||
range_header += compat_str(end)
|
||||
req.add_header('Range', range_header)
|
||||
|
||||
def establish_connection():
|
||||
if ctx.resume_len != 0:
|
||||
self.report_resuming_byte(ctx.resume_len)
|
||||
request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
|
||||
ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
|
||||
if not is_test and chunk_size else chunk_size)
|
||||
if ctx.resume_len > 0:
|
||||
range_start = ctx.resume_len
|
||||
if ctx.is_resume:
|
||||
self.report_resuming_byte(ctx.resume_len)
|
||||
ctx.open_mode = 'ab'
|
||||
elif ctx.chunk_size > 0:
|
||||
range_start = 0
|
||||
else:
|
||||
range_start = None
|
||||
ctx.is_resume = False
|
||||
range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
|
||||
if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
|
||||
range_end = ctx.data_len - 1
|
||||
has_range = range_start is not None
|
||||
ctx.has_range = has_range
|
||||
request = sanitized_Request(url, None, headers)
|
||||
if has_range:
|
||||
set_range(request, range_start, range_end)
|
||||
# Establish connection
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
@@ -77,29 +112,40 @@ class HttpFD(FileDownloader):
|
||||
# that don't support resuming and serve a whole file with no Content-Range
|
||||
# set in response despite of requested Range (see
|
||||
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
||||
if ctx.resume_len > 0:
|
||||
if has_range:
|
||||
content_range = ctx.data.headers.get('Content-Range')
|
||||
if content_range:
|
||||
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
||||
content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
|
||||
# Content-Range is present and matches requested Range, resume is possible
|
||||
if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
|
||||
return
|
||||
if content_range_m:
|
||||
if range_start == int(content_range_m.group(1)):
|
||||
content_range_end = int_or_none(content_range_m.group(2))
|
||||
content_len = int_or_none(content_range_m.group(3))
|
||||
accept_content_len = (
|
||||
# Non-chunked download
|
||||
not ctx.chunk_size or
|
||||
# Chunked download and requested piece or
|
||||
# its part is promised to be served
|
||||
content_range_end == range_end or
|
||||
content_len < range_end)
|
||||
if accept_content_len:
|
||||
ctx.data_len = content_len
|
||||
return
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
# and performing entire redownload
|
||||
self.report_unable_to_resume()
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||
return
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
elif err.code == 416:
|
||||
if err.code == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
ctx.data = self.ydl.urlopen(basic_request)
|
||||
ctx.data = self.ydl.urlopen(
|
||||
sanitized_Request(url, None, headers))
|
||||
content_length = ctx.data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
@@ -130,6 +176,9 @@ class HttpFD(FileDownloader):
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
elif err.code < 500 or err.code >= 600:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
except socket.error as err:
|
||||
if err.errno != errno.ECONNRESET:
|
||||
@@ -160,7 +209,7 @@ class HttpFD(FileDownloader):
|
||||
return False
|
||||
|
||||
byte_counter = 0 + ctx.resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
block_size = ctx.block_size
|
||||
start = time.time()
|
||||
|
||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||
@@ -233,25 +282,30 @@ class HttpFD(FileDownloader):
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
||||
if data_len is None:
|
||||
if ctx.data_len is None:
|
||||
eta = None
|
||||
else:
|
||||
eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
||||
eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
||||
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'total_bytes': ctx.data_len,
|
||||
'tmpfilename': ctx.tmpfilename,
|
||||
'filename': ctx.filename,
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - start,
|
||||
'elapsed': now - ctx.start_time,
|
||||
})
|
||||
|
||||
if is_test and byte_counter == data_len:
|
||||
break
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
|
||||
ctx.resume_len = byte_counter
|
||||
# ctx.block_size = block_size
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
@@ -276,7 +330,7 @@ class HttpFD(FileDownloader):
|
||||
'total_bytes': byte_counter,
|
||||
'filename': ctx.filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - start,
|
||||
'elapsed': time.time() - ctx.start_time,
|
||||
})
|
||||
|
||||
return True
|
||||
@@ -290,6 +344,8 @@ class HttpFD(FileDownloader):
|
||||
if count <= retries:
|
||||
self.report_retry(e.source_error, count, retries)
|
||||
continue
|
||||
except NextFragment:
|
||||
continue
|
||||
except SucceedDownload:
|
||||
return True
|
||||
|
||||
|
@@ -1,25 +1,27 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import struct
|
||||
import binascii
|
||||
import io
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..compat import (
|
||||
compat_Struct,
|
||||
compat_urllib_error,
|
||||
)
|
||||
|
||||
|
||||
u8 = struct.Struct(b'>B')
|
||||
u88 = struct.Struct(b'>Bx')
|
||||
u16 = struct.Struct(b'>H')
|
||||
u1616 = struct.Struct(b'>Hxx')
|
||||
u32 = struct.Struct(b'>I')
|
||||
u64 = struct.Struct(b'>Q')
|
||||
u8 = compat_Struct('>B')
|
||||
u88 = compat_Struct('>Bx')
|
||||
u16 = compat_Struct('>H')
|
||||
u1616 = compat_Struct('>Hxx')
|
||||
u32 = compat_Struct('>I')
|
||||
u64 = compat_Struct('>Q')
|
||||
|
||||
s88 = struct.Struct(b'>bx')
|
||||
s16 = struct.Struct(b'>h')
|
||||
s1616 = struct.Struct(b'>hxx')
|
||||
s32 = struct.Struct(b'>i')
|
||||
s88 = compat_Struct('>bx')
|
||||
s16 = compat_Struct('>h')
|
||||
s1616 = compat_Struct('>hxx')
|
||||
s32 = compat_Struct('>i')
|
||||
|
||||
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
|
||||
|
||||
@@ -139,7 +141,7 @@ def write_piff_header(stream, params):
|
||||
sample_entry_payload += u16.pack(0x18) # depth
|
||||
sample_entry_payload += s16.pack(-1) # pre defined
|
||||
|
||||
codec_private_data = binascii.unhexlify(params['codec_private_data'])
|
||||
codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
|
||||
if fourcc in ('H264', 'AVC1'):
|
||||
sps, pps = codec_private_data.split(u32.pack(1))[1:]
|
||||
avcc_payload = u8.pack(1) # configuration version
|
||||
|
@@ -1,6 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
@@ -10,6 +13,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -101,21 +105,24 @@ class ABCIE(InfoExtractor):
|
||||
class ABCIViewIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au:iview'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
|
||||
'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
|
||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||
'info_dict': {
|
||||
'id': 'ZX9735A001S00',
|
||||
'id': 'ZW0898A003S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Diaries Of A Broken Mind',
|
||||
'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
|
||||
'upload_date': '20161010',
|
||||
'uploader_id': 'abc2',
|
||||
'timestamp': 1476064920,
|
||||
'title': 'Series 5 Ep 3',
|
||||
'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
|
||||
'upload_date': '20171228',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1514499187,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -126,20 +133,30 @@ class ABCIViewIE(InfoExtractor):
|
||||
title = video_params.get('title') or video_params['seriesTitle']
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||
|
||||
format_urls = [
|
||||
try_get(stream, lambda x: x['hds-unmetered'], compat_str)]
|
||||
house_number = video_params.get('episodeHouseNumber')
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
|
||||
int(time.time()), house_number)
|
||||
sig = hmac.new(
|
||||
'android.content.res.Resources'.encode('utf-8'),
|
||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||
token = self._download_webpage(
|
||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||
|
||||
# May have higher quality video
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hds']['sd'], compat_str)
|
||||
if sd_url:
|
||||
format_urls.append(sd_url.replace('metered', 'um'))
|
||||
def tokenize_url(url, token):
|
||||
return update_url_query(url, {
|
||||
'hdnea': token,
|
||||
})
|
||||
|
||||
formats = []
|
||||
for format_url in format_urls:
|
||||
if format_url:
|
||||
formats.extend(
|
||||
self._extract_akamai_formats(format_url, video_id))
|
||||
for sd in ('sd', 'sd-low'):
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||
if not sd_url:
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
tokenize_url(sd_url, token), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
if formats:
|
||||
break
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
@@ -66,7 +66,7 @@ class AbcNewsIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
'info_dict': {
|
||||
'id': '10498713',
|
||||
'id': '10505354',
|
||||
'ext': 'flv',
|
||||
'display_id': 'dramatic-video-rare-death-job-america',
|
||||
'title': 'Occupational Hazards',
|
||||
@@ -79,7 +79,7 @@ class AbcNewsIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||
'info_dict': {
|
||||
'id': '39125818',
|
||||
'id': '38897857',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||
|
@@ -8,7 +8,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
)
|
||||
|
||||
@@ -32,7 +32,7 @@ class ACastIE(InfoExtractor):
|
||||
}, {
|
||||
# test with multiple blings
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': '55c0097badd7095f494c99a172f86501',
|
||||
'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
@@ -40,23 +40,24 @@ class ACastIE(InfoExtractor):
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'duration': 2797,
|
||||
'duration': 2766,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
cast_data = self._download_json(
|
||||
'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id)
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)
|
||||
e = cast_data['result']['episode']
|
||||
return {
|
||||
'id': compat_str(cast_data['id']),
|
||||
'id': compat_str(e['id']),
|
||||
'display_id': display_id,
|
||||
'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0],
|
||||
'title': cast_data['name'],
|
||||
'description': cast_data.get('description'),
|
||||
'thumbnail': cast_data.get('image'),
|
||||
'timestamp': parse_iso8601(cast_data.get('publishingDate')),
|
||||
'duration': int_or_none(cast_data.get('duration')),
|
||||
'url': e['mediaUrl'],
|
||||
'title': e['name'],
|
||||
'description': e.get('description'),
|
||||
'thumbnail': e.get('image'),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate')),
|
||||
'duration': int_or_none(e.get('duration')),
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,13 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import compat_ord
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
@@ -48,9 +50,9 @@ class ADNIE(InfoExtractor):
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
|
||||
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||
|
@@ -122,7 +122,8 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': 'high_video_s3'
|
||||
'assetTypes': 'high_video_ak',
|
||||
'switch': 'hls_high_ak',
|
||||
}
|
||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||
media_url = self._search_regex(
|
||||
|
@@ -175,10 +175,23 @@ class AfreecaTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
station_id = self._search_regex(
|
||||
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
|
||||
bbs_id = self._search_regex(
|
||||
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
|
||||
video_id = self._search_regex(
|
||||
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, query={
|
||||
video_id, headers={
|
||||
'Referer': 'http://vod.afreecatv.com/embed.php',
|
||||
}, query={
|
||||
'nTitleNo': video_id,
|
||||
'nStationNo': station_id,
|
||||
'nBbsNo': bbs_id,
|
||||
'partialView': 'SKIP_ADULT',
|
||||
})
|
||||
|
||||
@@ -228,10 +241,19 @@ class AfreecaTVIE(InfoExtractor):
|
||||
r'^(\d{8})_', key, 'upload date', default=None)
|
||||
file_duration = int_or_none(file_element.get('duration'))
|
||||
format_id = key if key else '%s_%s' % (video_id, file_num)
|
||||
formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls',
|
||||
note='Downloading part %d m3u8 information' % file_num)
|
||||
if determine_ext(file_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls',
|
||||
note='Downloading part %d m3u8 information' % file_num)
|
||||
else:
|
||||
formats = [{
|
||||
'url': file_url,
|
||||
'format_id': 'http',
|
||||
}]
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
file_info = common_entry.copy()
|
||||
file_info.update({
|
||||
'id': format_id,
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
@@ -51,6 +51,9 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
}, {
|
||||
'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -85,8 +85,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
|
||||
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
||||
error = self._search_regex(
|
||||
r'<p class="alert alert-danger">(.+?)</p>',
|
||||
response, 'error', default=None)
|
||||
r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
|
||||
response, 'error', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
@@ -24,57 +24,30 @@ class ARDMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||
# available till 26.07.2022
|
||||
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
|
||||
'info_dict': {
|
||||
'id': '29582122',
|
||||
'id': '44726822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ich liebe das Leben trotzdem',
|
||||
'description': 'md5:45e4c225c72b27993314b31a84a5261c',
|
||||
'duration': 4557,
|
||||
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
|
||||
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||
'info_dict': {
|
||||
'id': '29522730',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
|
||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||
'duration': 5252,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
|
||||
'info_dict': {
|
||||
'id': '28488308',
|
||||
'ext': 'mp3',
|
||||
'title': 'Tod eines Fußballers',
|
||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||
'duration': 3240,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'md5': '4e8f00631aac0395fee17368ac0e9867',
|
||||
'info_dict': {
|
||||
'id': '30796318',
|
||||
'ext': 'mp3',
|
||||
'title': 'Vor dem Fest',
|
||||
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
||||
'duration': 3287,
|
||||
},
|
||||
'skip': 'Video is no longer available',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
@@ -252,20 +225,23 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'md5': 'd216c3a86493f9322545e045ddc3eb35',
|
||||
_TESTS = [{
|
||||
# available till 14.02.2019
|
||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
|
||||
'info_dict': {
|
||||
'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge',
|
||||
'id': '100',
|
||||
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
'duration': 2600,
|
||||
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
||||
'upload_date': '20140804',
|
||||
'duration': 4435.0,
|
||||
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
|
||||
'upload_date': '20180214',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
78
youtube_dl/extractor/aws.py
Normal file
78
youtube_dl/extractor/aws.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import hmac
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class AWSIE(InfoExtractor):
|
||||
_AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
|
||||
_AWS_REGION = 'us-east-1'
|
||||
|
||||
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
||||
query = query or {}
|
||||
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
|
||||
date = amz_date[:8]
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Host': self._AWS_PROXY_HOST,
|
||||
'X-Amz-Date': amz_date,
|
||||
'X-Api-Key': self._AWS_API_KEY
|
||||
}
|
||||
session_token = aws_dict.get('session_token')
|
||||
if session_token:
|
||||
headers['X-Amz-Security-Token'] = session_token
|
||||
|
||||
def aws_hash(s):
|
||||
return hashlib.sha256(s.encode('utf-8')).hexdigest()
|
||||
|
||||
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
|
||||
canonical_querystring = compat_urllib_parse_urlencode(query)
|
||||
canonical_headers = ''
|
||||
for header_name, header_value in sorted(headers.items()):
|
||||
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
|
||||
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
|
||||
canonical_request = '\n'.join([
|
||||
'GET',
|
||||
aws_dict['uri'],
|
||||
canonical_querystring,
|
||||
canonical_headers,
|
||||
signed_headers,
|
||||
aws_hash('')
|
||||
])
|
||||
|
||||
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
|
||||
credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
|
||||
credential_scope = '/'.join(credential_scope_list)
|
||||
string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
|
||||
|
||||
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
|
||||
def aws_hmac(key, msg):
|
||||
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
|
||||
|
||||
def aws_hmac_digest(key, msg):
|
||||
return aws_hmac(key, msg).digest()
|
||||
|
||||
def aws_hmac_hexdigest(key, msg):
|
||||
return aws_hmac(key, msg).hexdigest()
|
||||
|
||||
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
|
||||
for value in credential_scope_list:
|
||||
k_signing = aws_hmac_digest(k_signing, value)
|
||||
|
||||
signature = aws_hmac_hexdigest(k_signing, string_to_sign)
|
||||
|
||||
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
|
||||
headers['Authorization'] = ', '.join([
|
||||
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
|
||||
'SignedHeaders=%s' % signed_headers,
|
||||
'Signature=%s' % signature,
|
||||
])
|
||||
|
||||
return self._download_json(
|
||||
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
|
||||
video_id, headers=headers)
|
@@ -1,11 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
|
||||
|
||||
class BigflixIE(InfoExtractor):
|
||||
@@ -39,8 +41,8 @@ class BigflixIE(InfoExtractor):
|
||||
webpage, 'title')
|
||||
|
||||
def decode_url(quoted_b64_url):
|
||||
return base64.b64decode(compat_urllib_parse_unquote(
|
||||
quoted_b64_url).encode('ascii')).decode('utf-8')
|
||||
return compat_b64decode(compat_urllib_parse_unquote(
|
||||
quoted_b64_url)).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
for height, encoded_url in re.findall(
|
||||
|
@@ -102,6 +102,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
@@ -116,10 +117,15 @@ class BiliBiliIE(InfoExtractor):
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
headers = {
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=self.geo_verification_headers())
|
||||
headers=headers)
|
||||
|
||||
if 'durl' not in video_info:
|
||||
self._report_error(video_info)
|
||||
|
@@ -464,7 +464,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'timestamp': 1441391203,
|
||||
'upload_date': '20150904',
|
||||
'uploader_id': '929656772001',
|
||||
'formats': 'mincount:22',
|
||||
'formats': 'mincount:20',
|
||||
},
|
||||
}, {
|
||||
# with rtmp streams
|
||||
@@ -478,7 +478,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'timestamp': 1433556729,
|
||||
'upload_date': '20150606',
|
||||
'uploader_id': '4036320279001',
|
||||
'formats': 'mincount:41',
|
||||
'formats': 'mincount:39',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -564,59 +564,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||
|
||||
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
|
||||
try:
|
||||
json_data = self._download_json(api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
message = json_data.get('message') or json_data['error_code']
|
||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
||||
custom_fields = json_data['custom_fields']
|
||||
tve_token = self._extract_mvpd_auth(
|
||||
smuggled_data['source_url'], video_id,
|
||||
custom_fields['bcadobepassrequestorid'],
|
||||
custom_fields['bcadobepassresourceid'])
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
}, query={
|
||||
'tveToken': tve_token,
|
||||
})
|
||||
|
||||
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||
title = json_data['name'].strip()
|
||||
|
||||
formats = []
|
||||
@@ -682,6 +630,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
@@ -689,6 +638,9 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
|
||||
subtitles = {}
|
||||
for text_track in json_data.get('text_tracks', []):
|
||||
if text_track.get('src'):
|
||||
@@ -708,9 +660,72 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': duration,
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
'uploader_id': account_id,
|
||||
'uploader_id': json_data.get('account_id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'tags': json_data.get('tags', []),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||
|
||||
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
|
||||
headers = {
|
||||
'Accept': 'application/json;pk=%s' % policy_key,
|
||||
}
|
||||
referrer = smuggled_data.get('referrer')
|
||||
if referrer:
|
||||
headers.update({
|
||||
'Referer': referrer,
|
||||
'Origin': re.search(r'https?://[^/]+', referrer).group(0),
|
||||
})
|
||||
try:
|
||||
json_data = self._download_json(api_url, video_id, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
message = json_data.get('message') or json_data['error_code']
|
||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
||||
custom_fields = json_data['custom_fields']
|
||||
tve_token = self._extract_mvpd_auth(
|
||||
smuggled_data['source_url'], video_id,
|
||||
custom_fields['bcadobepassrequestorid'],
|
||||
custom_fields['bcadobepassresourceid'])
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
}, query={
|
||||
'tveToken': tve_token,
|
||||
})
|
||||
|
||||
return self._parse_brightcove_metadata(
|
||||
json_data, video_id, headers=headers)
|
||||
|
@@ -4,59 +4,36 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
# ExtractorError,
|
||||
# HEADRequest,
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:
|
||||
(?:(?:www|m)\.)?canalplus\.fr|
|
||||
(?:www\.)?piwiplus\.fr|
|
||||
(?:www\.)?d8\.tv|
|
||||
(?:www\.)?c8\.fr|
|
||||
(?:www\.)?d17\.tv|
|
||||
(?:(?:football|www)\.)?cstar\.fr|
|
||||
(?:www\.)?itele\.fr
|
||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||
player\.canalplus\.fr/#/(?P<id>\d+)
|
||||
)
|
||||
|
||||
'''
|
||||
IE_DESC = 'mycanal.fr and piwiplus.fr'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
|
||||
_SITE_ID_MAP = {
|
||||
'canalplus': 'cplus',
|
||||
'mycanal': 'cplus',
|
||||
'piwiplus': 'teletoon',
|
||||
'd8': 'd8',
|
||||
'c8': 'd8',
|
||||
'd17': 'd17',
|
||||
'cstar': 'd17',
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
||||
# Only works for direct mp4 URLs
|
||||
_GEO_COUNTRIES = ['FR']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
|
||||
'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061',
|
||||
'info_dict': {
|
||||
'id': '1405510',
|
||||
'display_id': 'pid1830-c-zapping',
|
||||
'id': '1397061',
|
||||
'display_id': 'lolywood',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zapping - 02/07/2016',
|
||||
'description': 'Le meilleur de toutes les chaînes, tous les jours',
|
||||
'upload_date': '20160702',
|
||||
'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34',
|
||||
'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e',
|
||||
'upload_date': '20160602',
|
||||
},
|
||||
}, {
|
||||
# geo restricted, bypassed
|
||||
@@ -70,64 +47,12 @@ class CanalplusIE(InfoExtractor):
|
||||
'upload_date': '20140724',
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
# geo restricted, bypassed
|
||||
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684',
|
||||
'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d',
|
||||
'info_dict': {
|
||||
'id': '1443684',
|
||||
'display_id': 'pid6318-videos-integrales',
|
||||
'ext': 'mp4',
|
||||
'title': 'Guess my iep ! - TPMP - 07/04/2017',
|
||||
'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa',
|
||||
'upload_date': '20170407',
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
||||
'info_dict': {
|
||||
'id': '1420176',
|
||||
'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
|
||||
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
||||
'upload_date': '20161014',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
|
||||
'info_dict': {
|
||||
'id': '1416769',
|
||||
'display_id': 'pid7566-feminines-videos',
|
||||
'ext': 'mp4',
|
||||
'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
|
||||
'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
|
||||
'upload_date': '20160921',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site, display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
|
||||
|
||||
# Beware, some subclasses do not define an id group
|
||||
display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
|
||||
r'id=["\']canal_video_player(?P<id>\d+)',
|
||||
r'data-video=["\'](?P<id>\d+)'],
|
||||
webpage, 'video id', default=mobj.group('vid'), group='id')
|
||||
site_id = self._SITE_ID_MAP[site]
|
||||
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
||||
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
|
||||
@@ -161,7 +86,7 @@ class CanalplusIE(InfoExtractor):
|
||||
format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
# the secret extracted ya function in http://player.canalplus.fr/common/js/canalPlayer.js
|
||||
# the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js
|
||||
'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',
|
||||
'format_id': format_id,
|
||||
'preference': preference(format_id),
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -13,6 +14,7 @@ from ..utils import (
|
||||
xpath_element,
|
||||
xpath_with_ns,
|
||||
find_xpath_attr,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
@@ -359,3 +361,63 @@ class CBCWatchIE(CBCWatchBaseIE):
|
||||
video_id = self._match_id(url)
|
||||
rss = self._call_api('web/browse/' + video_id, video_id)
|
||||
return self._parse_rss_feed(rss)
|
||||
|
||||
|
||||
class CBCOlympicsIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:olympics'
|
||||
_VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._hidden_inputs(webpage)['videoId']
|
||||
video_doc = self._download_xml(
|
||||
'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id)
|
||||
title = xpath_text(video_doc, 'title', fatal=True)
|
||||
is_live = xpath_text(video_doc, 'kind') == 'Live'
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
||||
formats = []
|
||||
for video_source in video_doc.findall('videoSources/videoSource'):
|
||||
uri = xpath_text(video_source, 'uri')
|
||||
if not uri:
|
||||
continue
|
||||
tokenize = self._download_json(
|
||||
'https://olympics.cbc.ca/api/api-akamai/tokenize',
|
||||
video_id, data=json.dumps({
|
||||
'VideoSource': uri,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': url,
|
||||
# d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
|
||||
'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie
|
||||
}, fatal=False)
|
||||
if not tokenize:
|
||||
continue
|
||||
content_url = tokenize['ContentUrl']
|
||||
video_source_format = video_source.get('format')
|
||||
if video_source_format == 'IIS':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
content_url, video_id, ism_id=video_source_format, fatal=False))
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
content_url, video_id, 'mp4',
|
||||
'm3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id=video_source_format, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': xpath_text(video_doc, 'description'),
|
||||
'thumbnail': xpath_text(video_doc, 'thumbnailUrl'),
|
||||
'duration': parse_duration(xpath_text(video_doc, 'duration')),
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
@@ -75,10 +75,10 @@ class CBSInteractiveIE(CBSIE):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data_json = self._html_search_regex(
|
||||
r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
|
||||
r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
vdata = data.get('video') or data['videos'][0]
|
||||
vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
|
||||
|
||||
video_id = vdata['mpxRefId']
|
||||
|
||||
|
@@ -91,12 +91,10 @@ class CBSLocalIE(AnvatoIE):
|
||||
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
|
||||
time_str = self._html_search_regex(
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
|
||||
if time_str:
|
||||
timestamp = unified_timestamp(time_str)
|
||||
else:
|
||||
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
|
||||
'released date', default=None)) or parse_iso8601(
|
||||
self._html_search_meta('uploadDate', webpage))
|
||||
|
||||
info_dict.update({
|
||||
'display_id': display_id,
|
||||
|
@@ -1,11 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError
|
||||
@@ -58,7 +58,7 @@ class ChilloutzoneIE(InfoExtractor):
|
||||
|
||||
base64_video_info = self._html_search_regex(
|
||||
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||
decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8')
|
||||
decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8')
|
||||
video_info_dict = json.loads(decoded_video_info)
|
||||
|
||||
# get video information from dict
|
||||
|
@@ -1,10 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
@@ -44,8 +44,7 @@ class ChirbitIE(InfoExtractor):
|
||||
|
||||
# Reverse engineered from https://chirb.it/js/chirbit.player.js (look
|
||||
# for soundURL)
|
||||
audio_url = base64.b64decode(
|
||||
data_fd[::-1].encode('ascii')).decode('utf-8')
|
||||
audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8')
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
|
||||
|
@@ -1,93 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class CollegeRamaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://collegerama\.tudelft\.nl/Mediasite/Play/(?P<id>[\da-f]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
|
||||
'md5': '481fda1c11f67588c0d9d8fbdced4e39',
|
||||
'info_dict': {
|
||||
'id': '585a43626e544bdd97aeb71a0ec907a01d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
|
||||
'duration': 7713.088,
|
||||
'timestamp': 1413309600,
|
||||
'upload_date': '20141014',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
|
||||
'md5': 'ef1fdded95bdf19b12c5999949419c92',
|
||||
'info_dict': {
|
||||
'id': '86a9ea9f53e149079fbdb4202b521ed21d',
|
||||
'ext': 'wmv',
|
||||
'title': '64ste Vakantiecursus: Afvalwater',
|
||||
'description': 'md5:7fd774865cc69d972f542b157c328305',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
|
||||
'duration': 10853,
|
||||
'timestamp': 1326446400,
|
||||
'upload_date': '20120113',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_options_request = {
|
||||
'getPlayerOptionsRequest': {
|
||||
'ResourceId': video_id,
|
||||
'QueryString': '',
|
||||
}
|
||||
}
|
||||
|
||||
request = sanitized_Request(
|
||||
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
|
||||
json.dumps(player_options_request))
|
||||
request.add_header('Content-Type', 'application/json')
|
||||
|
||||
player_options = self._download_json(request, video_id)
|
||||
|
||||
presentation = player_options['d']['Presentation']
|
||||
title = presentation['Title']
|
||||
description = presentation.get('Description')
|
||||
thumbnail = None
|
||||
duration = float_or_none(presentation.get('Duration'), 1000)
|
||||
timestamp = int_or_none(presentation.get('UnixTime'), 1000)
|
||||
|
||||
formats = []
|
||||
for stream in presentation['Streams']:
|
||||
for video in stream['VideoUrls']:
|
||||
thumbnail_url = stream.get('ThumbnailUrl')
|
||||
if thumbnail_url:
|
||||
thumbnail = 'http://collegerama.tudelft.nl' + thumbnail_url
|
||||
format_id = video['MediaType']
|
||||
if format_id == 'SS':
|
||||
continue
|
||||
formats.append({
|
||||
'url': video['Location'],
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@@ -174,6 +174,8 @@ class InfoExtractor(object):
|
||||
width : height ratio as float.
|
||||
* no_resume The server does not support resuming the
|
||||
(HTTP or RTMP) download. Boolean.
|
||||
* downloader_options A dictionary of downloader options as
|
||||
described in FileDownloader
|
||||
|
||||
url: Final video URL.
|
||||
ext: Video filename extension.
|
||||
@@ -301,8 +303,9 @@ class InfoExtractor(object):
|
||||
There must be a key "entries", which is a list, an iterable, or a PagedList
|
||||
object, each element of which is a valid dictionary by this specification.
|
||||
|
||||
Additionally, playlists can have "title", "description" and "id" attributes
|
||||
with the same semantics as videos (see above).
|
||||
Additionally, playlists can have "id", "title", "description", "uploader",
|
||||
"uploader_id", "uploader_url" attributes with the same semantics as videos
|
||||
(see above).
|
||||
|
||||
|
||||
_type "multi_video" indicates that there are multiple videos that
|
||||
@@ -494,6 +497,16 @@ class InfoExtractor(object):
|
||||
self.to_screen('%s' % (note,))
|
||||
else:
|
||||
self.to_screen('%s: %s' % (video_id, note))
|
||||
|
||||
# Some sites check X-Forwarded-For HTTP header in order to figure out
|
||||
# the origin of the client behind proxy. This allows bypassing geo
|
||||
# restriction by faking this header's value to IP that belongs to some
|
||||
# geo unrestricted country. We will do so once we encounter any
|
||||
# geo restriction error.
|
||||
if self._x_forwarded_for_ip:
|
||||
if 'X-Forwarded-For' not in headers:
|
||||
headers['X-Forwarded-For'] = self._x_forwarded_for_ip
|
||||
|
||||
if isinstance(url_or_request, compat_urllib_request.Request):
|
||||
url_or_request = update_Request(
|
||||
url_or_request, data=data, headers=headers, query=query)
|
||||
@@ -523,15 +536,6 @@ class InfoExtractor(object):
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
# Some sites check X-Forwarded-For HTTP header in order to figure out
|
||||
# the origin of the client behind proxy. This allows bypassing geo
|
||||
# restriction by faking this header's value to IP that belongs to some
|
||||
# geo unrestricted country. We will do so once we encounter any
|
||||
# geo restriction error.
|
||||
if self._x_forwarded_for_ip:
|
||||
if 'X-Forwarded-For' not in headers:
|
||||
headers['X-Forwarded-For'] = self._x_forwarded_for_ip
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
@@ -1025,7 +1029,7 @@ class InfoExtractor(object):
|
||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Article':
|
||||
elif item_type in ('Article', 'NewsArticle'):
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||
'title': unescapeHTML(e.get('headline')),
|
||||
@@ -1878,6 +1882,7 @@ class InfoExtractor(object):
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
'container': mimetype2ext(mime_type) + '_dash',
|
||||
}
|
||||
f.update(parse_codecs(representation_attrib.get('codecs')))
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
@@ -2005,16 +2010,14 @@ class InfoExtractor(object):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({location_key(initialization_url): initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == representation_id)
|
||||
except StopIteration:
|
||||
full_info = formats_dict.get(representation_id, {}).copy()
|
||||
full_info.update(f)
|
||||
formats.append(full_info)
|
||||
else:
|
||||
existing_format.update(f)
|
||||
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
|
||||
# is not necessarily unique within a Period thus formats with
|
||||
# the same `format_id` are quite possible. There are numerous examples
|
||||
# of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
|
||||
# https://github.com/rg3/youtube-dl/issues/13919)
|
||||
full_info = formats_dict.get(representation_id, {}).copy()
|
||||
full_info.update(f)
|
||||
formats.append(full_info)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
@@ -2054,7 +2057,7 @@ class InfoExtractor(object):
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
for track in stream.findall('QualityLevel'):
|
||||
fourcc = track.get('FourCC')
|
||||
fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
|
||||
# TODO: add support for WVC1 and WMAP
|
||||
if fourcc not in ('H264', 'AVC1', 'AACL'):
|
||||
self.report_warning('%s is not a supported codec' % fourcc)
|
||||
@@ -2247,9 +2250,10 @@ class InfoExtractor(object):
|
||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||
query = compat_urlparse.urlparse(url).query
|
||||
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
||||
url_base = self._search_regex(
|
||||
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
|
||||
http_base_url = '%s:%s' % ('http', url_base)
|
||||
mobj = re.search(
|
||||
r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
|
||||
url_base = mobj.group('url')
|
||||
http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
|
||||
formats = []
|
||||
|
||||
def manifest_url(manifest):
|
||||
@@ -2349,7 +2353,10 @@ class InfoExtractor(object):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
track_kind = track.get('kind')
|
||||
if not track_kind or not isinstance(track_kind, compat_str):
|
||||
continue
|
||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
@@ -2403,7 +2410,7 @@ class InfoExtractor(object):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=m3u8_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
elif source_type == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, video_id, mpd_id=mpd_id, fatal=False))
|
||||
elif ext == 'smil':
|
||||
|
@@ -3,13 +3,13 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import base64
|
||||
import zlib
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
@@ -272,8 +272,8 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
}
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
|
||||
iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
|
||||
data = bytes_to_intlist(compat_b64decode(data))
|
||||
iv = bytes_to_intlist(compat_b64decode(iv))
|
||||
id = int(id)
|
||||
|
||||
def obfuscate_key_aux(count, modulo, start):
|
||||
@@ -392,7 +392,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'Downloading subtitles for ' + sub_name, data={
|
||||
'subtitle_script_id': sub_id,
|
||||
})
|
||||
if not sub_doc:
|
||||
if sub_doc is None:
|
||||
continue
|
||||
sid = sub_doc.get('id')
|
||||
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
||||
@@ -479,9 +479,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if streamdata:
|
||||
if streamdata is not None:
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info:
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
stream_info = self._call_rpc_api(
|
||||
'VideoEncode_GetStreamInfo', video_id,
|
||||
@@ -490,7 +490,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if stream_info:
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
|
@@ -4,13 +4,14 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
find_xpath_attr,
|
||||
smuggle_url,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
)
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .ustream import UstreamIE
|
||||
@@ -68,6 +69,10 @@ class CSpanIE(InfoExtractor):
|
||||
'uploader': 'HouseCommittee',
|
||||
'uploader_id': '12987475',
|
||||
},
|
||||
}, {
|
||||
# Audio Only
|
||||
'url': 'https://www.c-span.org/video/?437336-1/judiciary-antitrust-competition-policy-consumer-rights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
|
||||
@@ -111,7 +116,15 @@ class CSpanIE(InfoExtractor):
|
||||
title = self._og_search_title(webpage)
|
||||
surl = smuggle_url(senate_isvp_url, {'force_title': title})
|
||||
return self.url_result(surl, 'SenateISVP', video_id, title)
|
||||
video_id = self._search_regex(
|
||||
r'jwsetup\.clipprog\s*=\s*(\d+);',
|
||||
webpage, 'jwsetup program id', default=None)
|
||||
if video_id:
|
||||
video_type = 'program'
|
||||
if video_type is None or video_id is None:
|
||||
error_message = get_element_by_class('VLplayer-error-message', webpage)
|
||||
if error_message:
|
||||
raise ExtractorError(error_message)
|
||||
raise ExtractorError('unable to find video id and type')
|
||||
|
||||
def get_text_attr(d, attr):
|
||||
@@ -138,7 +151,7 @@ class CSpanIE(InfoExtractor):
|
||||
entries = []
|
||||
for partnum, f in enumerate(files):
|
||||
formats = []
|
||||
for quality in f['qualities']:
|
||||
for quality in f.get('qualities', []):
|
||||
formats.append({
|
||||
'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')),
|
||||
'url': unescapeHTML(get_text_attr(quality, 'file')),
|
||||
|
@@ -10,6 +10,7 @@ from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
aes_cbc_encrypt,
|
||||
)
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
@@ -93,7 +94,7 @@ class DaisukiMottoIE(InfoExtractor):
|
||||
|
||||
rtn = self._parse_json(
|
||||
intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
|
||||
base64.b64decode(encrypted_rtn)),
|
||||
compat_b64decode(encrypted_rtn)),
|
||||
aes_key, iv)).decode('utf-8').rstrip('\0'),
|
||||
video_id)
|
||||
|
||||
|
56
youtube_dl/extractor/digg.py
Normal file
56
youtube_dl/extractor/digg.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class DiggIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# JWPlatform via provider
|
||||
'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out',
|
||||
'info_dict': {
|
||||
'id': 'LcqvmS0b',
|
||||
'ext': 'mp4',
|
||||
'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'",
|
||||
'description': 'md5:541bb847648b6ee3d6514bc84b82efda',
|
||||
'upload_date': '20180109',
|
||||
'timestamp': 1515530551,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Youtube via provider
|
||||
'url': 'http://digg.com/video/dog-boat-seal-play',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vimeo as regular embed
|
||||
'url': 'http://digg.com/video/dream-girl-short-film',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info',
|
||||
default='{}'), display_id, transform_source=js_to_json,
|
||||
fatal=False)
|
||||
|
||||
video_id = info.get('video_id')
|
||||
|
||||
if video_id:
|
||||
provider = info.get('provider_name')
|
||||
if provider == 'youtube':
|
||||
return self.url_result(
|
||||
video_id, ie='Youtube', video_id=video_id)
|
||||
elif provider == 'jwplayer':
|
||||
return self.url_result(
|
||||
'jwplatform:%s' % video_id, ie='JWPlatform',
|
||||
video_id=video_id)
|
||||
|
||||
return self.url_result(url, 'Generic')
|
@@ -5,15 +5,16 @@ import re
|
||||
import string
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
update_url_query,
|
||||
try_get,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
@@ -44,7 +45,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site, path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
react_data = self._parse_json(self._search_regex(
|
||||
@@ -55,14 +56,13 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
video_id = video['id']
|
||||
|
||||
access_token = self._download_json(
|
||||
'https://www.discovery.com/anonymous', display_id, query={
|
||||
'authLink': update_url_query(
|
||||
'https://login.discovery.com/v1/oauth2/authorize', {
|
||||
'client_id': react_data['application']['apiClientId'],
|
||||
'redirect_uri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html',
|
||||
'response_type': 'anonymous',
|
||||
'state': 'nonce,' + ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
})
|
||||
'https://www.%s.com/anonymous' % site, display_id, query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': try_get(
|
||||
react_data, lambda x: x['application']['apiClientId'],
|
||||
compat_str) or '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
||||
})['access_token']
|
||||
|
||||
try:
|
||||
|
@@ -5,6 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -73,7 +74,11 @@ class DiscoveryGoBaseIE(InfoExtractor):
|
||||
not subtitle_url.startswith('http')):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
ext = determine_ext(subtitle_url)
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
compat_str,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -108,9 +109,16 @@ class DisneyIE(InfoExtractor):
|
||||
continue
|
||||
tbr = int_or_none(flavor.get('bitrate'))
|
||||
if tbr == 99999:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
# wrong ks(Kaltura Signature) causes 404 Error
|
||||
flavor_url = update_url_query(flavor_url, {'ks': ''})
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
flavor_url, video_id, 'mp4',
|
||||
m3u8_id=flavor_format, fatal=False))
|
||||
m3u8_id=flavor_format, fatal=False)
|
||||
for f in m3u8_formats:
|
||||
# Apple FairPlay
|
||||
if '/fpshls/' in f['url']:
|
||||
continue
|
||||
formats.append(f)
|
||||
continue
|
||||
format_id = []
|
||||
if flavor_format:
|
||||
|
@@ -12,25 +12,28 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<domain>www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# non geo restricted, via secure api, unsigned download hls URL
|
||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
||||
'info_dict': {
|
||||
'id': '3172',
|
||||
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
|
||||
'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet',
|
||||
'ext': 'mp4',
|
||||
'title': 'Svensken lär sig njuta av livet',
|
||||
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
|
||||
@@ -48,7 +51,7 @@ class DPlayIE(InfoExtractor):
|
||||
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
|
||||
'info_dict': {
|
||||
'id': '70816',
|
||||
'display_id': 'season-6-episode-12',
|
||||
'display_id': 'mig-og-min-mor/season-6-episode-12',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 12',
|
||||
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
|
||||
@@ -65,6 +68,33 @@ class DPlayIE(InfoExtractor):
|
||||
# geo restricted, via direct unsigned hls URL
|
||||
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# disco-api
|
||||
'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
|
||||
'info_dict': {
|
||||
'id': '40206',
|
||||
'display_id': 'i-kongens-klr/sesong-1-episode-7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 7',
|
||||
'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
|
||||
'duration': 2611.16,
|
||||
'timestamp': 1516726800,
|
||||
'upload_date': '20180123',
|
||||
'series': 'I kongens klær',
|
||||
'season_number': 1,
|
||||
'episode_number': 7,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
|
||||
'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -72,10 +102,81 @@ class DPlayIE(InfoExtractor):
|
||||
display_id = mobj.group('id')
|
||||
domain = mobj.group('domain')
|
||||
|
||||
self._initialize_geo_bypass([mobj.group('country').upper()])
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id', default=None)
|
||||
|
||||
if not video_id:
|
||||
host = mobj.group('host')
|
||||
disco_base = 'https://disco-api.%s' % host
|
||||
self._download_json(
|
||||
'%s/token' % disco_base, display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': host.replace('.', ''),
|
||||
})
|
||||
video = self._download_json(
|
||||
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
||||
headers={
|
||||
'Referer': url,
|
||||
'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
|
||||
}, query={
|
||||
'include': 'show'
|
||||
})
|
||||
video_id = video['data']['id']
|
||||
info = video['data']['attributes']
|
||||
title = info['name']
|
||||
formats = []
|
||||
for format_id, format_dict in self._download_json(
|
||||
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
||||
display_id)['data']['attributes']['streaming'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if format_id == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, display_id, mpd_id='dash', fatal=False))
|
||||
elif format_id == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = None
|
||||
try:
|
||||
included = video.get('included')
|
||||
if isinstance(included, list):
|
||||
show = next(e for e in included if e.get('type') == 'show')
|
||||
series = try_get(
|
||||
show, lambda x: x['attributes']['name'], compat_str)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'duration': float_or_none(
|
||||
info.get('videoDuration'), scale=1000),
|
||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||
'series': series,
|
||||
'season_number': int_or_none(info.get('seasonNumber')),
|
||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
info = self._download_json(
|
||||
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
||||
|
@@ -1,10 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
@@ -42,7 +42,7 @@ class DumpertIE(InfoExtractor):
|
||||
r'data-files="([^"]+)"', webpage, 'data files')
|
||||
|
||||
files = self._parse_json(
|
||||
base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
|
||||
compat_b64decode(files_base64).decode('utf-8'),
|
||||
video_id)
|
||||
|
||||
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||
|
@@ -32,7 +32,7 @@ class DVTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
||||
'info_dict': {
|
||||
'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
|
||||
'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci',
|
||||
'id': '973eb3bc854e11e498be002590604f2e',
|
||||
},
|
||||
'playlist': [{
|
||||
@@ -91,10 +91,24 @@ class DVTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
|
||||
'md5': '87defe16681b1429c91f7a74809823c6',
|
||||
'info_dict': {
|
||||
'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _parse_video_metadata(self, js, video_id):
|
||||
def _parse_video_metadata(self, js, video_id, live_js=None):
|
||||
data = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||
if live_js:
|
||||
data.update(self._parse_json(
|
||||
live_js, video_id, transform_source=js_to_json))
|
||||
|
||||
title = unescapeHTML(data['title'])
|
||||
|
||||
@@ -142,13 +156,18 @@ class DVTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# live content
|
||||
live_item = self._search_regex(
|
||||
r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
|
||||
webpage, 'video', default=None)
|
||||
|
||||
# single video
|
||||
item = self._search_regex(
|
||||
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
|
||||
webpage, 'video', default=None, fatal=False)
|
||||
webpage, 'video', default=None)
|
||||
|
||||
if item:
|
||||
return self._parse_video_metadata(item, video_id)
|
||||
return self._parse_video_metadata(item, video_id, live_item)
|
||||
|
||||
# playlist
|
||||
items = re.findall(
|
||||
|
@@ -1,13 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
@@ -36,9 +36,9 @@ class EinthusanIE(InfoExtractor):
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
def _decrypt(self, encrypted_data, video_id):
|
||||
return self._parse_json(base64.b64decode((
|
||||
return self._parse_json(compat_b64decode((
|
||||
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
||||
).encode('ascii')).decode('utf-8'), video_id)
|
||||
)).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -1,6 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .once import OnceIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -9,22 +12,27 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class ESPNIE(InfoExtractor):
|
||||
class ESPNIE(OnceIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:\w+\.)+)?espn\.go|
|
||||
(?:www\.)?espn
|
||||
)\.com/
|
||||
(?:
|
||||
(?:
|
||||
video/clip|
|
||||
watch/player
|
||||
)
|
||||
(?:
|
||||
\?.*?\bid=|
|
||||
/_/id/
|
||||
)
|
||||
(?:
|
||||
(?:(?:\w+\.)+)?espn\.go|
|
||||
(?:www\.)?espn
|
||||
)\.com/
|
||||
(?:
|
||||
(?:
|
||||
video/(?:clip|iframe/twitter)|
|
||||
watch/player
|
||||
)
|
||||
(?:
|
||||
.*?\?.*?\bid=|
|
||||
/_/id/
|
||||
)
|
||||
)
|
||||
)|
|
||||
(?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
@@ -77,6 +85,15 @@ class ESPNIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip/_/id/17989860',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -93,7 +110,9 @@ class ESPNIE(InfoExtractor):
|
||||
|
||||
def traverse_source(source, base_source_id=None):
|
||||
for source_id, source in source.items():
|
||||
if isinstance(source, compat_str):
|
||||
if source_id == 'alert':
|
||||
continue
|
||||
elif isinstance(source, compat_str):
|
||||
extract_source(source, base_source_id)
|
||||
elif isinstance(source, dict):
|
||||
traverse_source(
|
||||
@@ -106,7 +125,9 @@ class ESPNIE(InfoExtractor):
|
||||
return
|
||||
format_urls.add(source_url)
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'smil':
|
||||
if OnceIE.suitable(source_url):
|
||||
formats.extend(self._extract_once_formats(source_url))
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
source_url, video_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
@@ -117,12 +138,24 @@ class ESPNIE(InfoExtractor):
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=source_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
f = {
|
||||
'url': source_url,
|
||||
'format_id': source_id,
|
||||
})
|
||||
}
|
||||
mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'height': int(mobj.group(1)),
|
||||
'fps': int(mobj.group(2)),
|
||||
'tbr': int(mobj.group(3)),
|
||||
})
|
||||
if source_id == 'mezzanine':
|
||||
f['preference'] = 1
|
||||
formats.append(f)
|
||||
|
||||
traverse_source(clip['links']['source'])
|
||||
links = clip.get('links', {})
|
||||
traverse_source(links.get('source', {}))
|
||||
traverse_source(links.get('mobile', {}))
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = clip.get('caption') or clip.get('description')
|
||||
@@ -144,9 +177,6 @@ class ESPNIE(InfoExtractor):
|
||||
class ESPNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://espn.go.com/nba/recap?gameId=400793786',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
@@ -175,3 +205,34 @@ class ESPNArticleIE(InfoExtractor):
|
||||
|
||||
return self.url_result(
|
||||
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
|
||||
|
||||
|
||||
class FiveThirtyEightIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
|
||||
'info_dict': {
|
||||
'id': '21846851',
|
||||
'ext': 'mp4',
|
||||
'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
|
||||
'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
|
||||
'timestamp': 1513960621,
|
||||
'upload_date': '20171222',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](?P<id>\d+)',
|
||||
webpage, 'video id', group='id')
|
||||
|
||||
return self.url_result(
|
||||
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
|
||||
|
@@ -162,6 +162,7 @@ from .cbc import (
|
||||
CBCPlayerIE,
|
||||
CBCWatchVideoIE,
|
||||
CBCWatchIE,
|
||||
CBCOlympicsIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
@@ -205,7 +206,6 @@ from .cnn import (
|
||||
CNNArticleIE,
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralFullEpisodesIE,
|
||||
ComedyCentralIE,
|
||||
@@ -260,6 +260,7 @@ from .deezer import DeezerPlaylistIE
|
||||
from .democracynow import DemocracynowIE
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digg import DiggIE
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import (
|
||||
DouyuShowIE,
|
||||
@@ -322,6 +323,7 @@ from .escapist import EscapistIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
ESPNArticleIE,
|
||||
FiveThirtyEightIE,
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .etonline import ETOnlineIE
|
||||
@@ -344,6 +346,7 @@ from .filmon import (
|
||||
FilmOnIE,
|
||||
FilmOnChannelIE,
|
||||
)
|
||||
from .filmweb import FilmwebIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
@@ -371,8 +374,10 @@ from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
FranceTVIE,
|
||||
FranceTVSiteIE,
|
||||
FranceTVEmbedIE,
|
||||
FranceTVInfoIE,
|
||||
FranceTVJeunesseIE,
|
||||
GenerationWhatIE,
|
||||
CultureboxIE,
|
||||
)
|
||||
@@ -464,6 +469,7 @@ from .indavideo import (
|
||||
)
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internazionale import InternazionaleIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
@@ -487,7 +493,6 @@ from .jwplatform import JWPlatformIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kamcord import KamcordIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
@@ -561,7 +566,11 @@ from .lynda import (
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
MailRuMusicIE,
|
||||
MailRuMusicSearchIE,
|
||||
)
|
||||
from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .mangomolo import (
|
||||
@@ -573,6 +582,7 @@ from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .mediaset import MediasetIE
|
||||
from .mediasite import MediasiteIE
|
||||
from .medici import MediciIE
|
||||
from .megaphone import MegaphoneIE
|
||||
from .meipai import MeipaiIE
|
||||
@@ -606,7 +616,10 @@ from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .moniker import MonikerIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import MotherlessIE
|
||||
from .motherless import (
|
||||
MotherlessIE,
|
||||
MotherlessGroupIE
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviezine import MoviezineIE
|
||||
@@ -624,7 +637,10 @@ from .musicplayon import MusicPlayOnIE
|
||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvi import MyviIE
|
||||
from .myvi import (
|
||||
MyviIE,
|
||||
MyviEmbedIE,
|
||||
)
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicVideoIE,
|
||||
@@ -638,6 +654,7 @@ from .nbc import (
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCOlympicsIE,
|
||||
NBCOlympicsStreamIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
)
|
||||
@@ -875,7 +892,6 @@ from .revision3 import (
|
||||
Revision3IE,
|
||||
)
|
||||
from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
@@ -895,6 +911,7 @@ from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rtvs import RTVSIE
|
||||
from .rudo import RudoIE
|
||||
from .ruhd import RUHDIE
|
||||
from .ruleporn import RulePornIE
|
||||
@@ -909,7 +926,6 @@ from .rutube import (
|
||||
from .rutv import RUTVIE
|
||||
from .ruutu import RuutuIE
|
||||
from .ruv import RuvIE
|
||||
from .sandia import SandiaIE
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
@@ -926,8 +942,16 @@ from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .servus import ServusIE
|
||||
from .sevenplus import SevenPlusIE
|
||||
from .sexu import SexuIE
|
||||
from .shahid import ShahidIE
|
||||
from .seznamzpravy import (
|
||||
SeznamZpravyIE,
|
||||
SeznamZpravyArticleIE,
|
||||
)
|
||||
from .shahid import (
|
||||
ShahidIE,
|
||||
ShahidShowIE,
|
||||
)
|
||||
from .shared import (
|
||||
SharedIE,
|
||||
VivoIE,
|
||||
@@ -981,7 +1005,7 @@ from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .sportschau import SportschauIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
@@ -1025,7 +1049,11 @@ from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .telequebec import TeleQuebecIE
|
||||
from .telequebec import (
|
||||
TeleQuebecIE,
|
||||
TeleQuebecEmissionIE,
|
||||
TeleQuebecLiveIE,
|
||||
)
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .testurl import TestURLIE
|
||||
@@ -1037,7 +1065,6 @@ from .theplatform import (
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thescene import TheSceneIE
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .thesun import TheSunIE
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
@@ -1115,6 +1142,7 @@ from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
@@ -1137,8 +1165,10 @@ from .udemy import (
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .udn import UDNEmbedIE
|
||||
from .ufctv import UFCTVIE
|
||||
from .uktvplay import UKTVPlayIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
from .uol import UOLIE
|
||||
@@ -1195,6 +1225,7 @@ from .videomore import (
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videopress import VideoPressIE
|
||||
from .vidio import VidioIE
|
||||
from .vidlii import VidLiiIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
VidmeUserIE,
|
||||
@@ -1276,6 +1307,8 @@ from .watchbox import WatchBoxIE
|
||||
from .watchindianporn import WatchIndianPornIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRPageIE,
|
||||
WDRElefantIE,
|
||||
WDRMobileIE,
|
||||
)
|
||||
from .webcaster import (
|
||||
@@ -1286,6 +1319,10 @@ from .webofstories import (
|
||||
WebOfStoriesIE,
|
||||
WebOfStoriesPlaylistIE,
|
||||
)
|
||||
from .weibo import (
|
||||
WeiboIE,
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
@@ -1311,6 +1348,10 @@ from .xiami import (
|
||||
XiamiArtistIE,
|
||||
XiamiCollectionIE
|
||||
)
|
||||
from .ximalaya import (
|
||||
XimalayaIE,
|
||||
XimalayaAlbumIE
|
||||
)
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
|
42
youtube_dl/extractor/filmweb.py
Normal file
42
youtube_dl/extractor/filmweb.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FilmwebIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?filmweb\.no/(?P<type>trailere|filmnytt)/article(?P<id>\d+)\.ece'
|
||||
_TEST = {
|
||||
'url': 'http://www.filmweb.no/trailere/article1264921.ece',
|
||||
'md5': 'e353f47df98e557d67edaceda9dece89',
|
||||
'info_dict': {
|
||||
'id': '13033574',
|
||||
'ext': 'mp4',
|
||||
'title': 'Det som en gang var',
|
||||
'upload_date': '20160316',
|
||||
'timestamp': 1458140101,
|
||||
'uploader_id': '12639966',
|
||||
'uploader': 'Live Roaldset',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_type, article_id = re.match(self._VALID_URL, url).groups()
|
||||
if article_type == 'filmnytt':
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id')
|
||||
embed_code = self._download_json(
|
||||
'https://www.filmweb.no/template_v2/ajax/json_trailerEmbed.jsp',
|
||||
article_id, query={
|
||||
'articleId': article_id,
|
||||
})['embedCode']
|
||||
iframe_url = self._proto_relative_url(self._search_regex(
|
||||
r'<iframe[^>]+src="([^"]+)', embed_code, 'iframe url'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': article_id,
|
||||
'url': iframe_url,
|
||||
'ie_key': 'TwentyThreeVideo',
|
||||
}
|
@@ -33,7 +33,7 @@ class FranceInterIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
upload_date_str = self._search_regex(
|
||||
r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
webpage, 'upload date', fatal=False)
|
||||
if upload_date_str:
|
||||
upload_date_list = upload_date_str.split()
|
||||
|
@@ -5,19 +5,89 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
try_get,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
def _make_url_result(self, video_or_full_id, catalog=None):
|
||||
full_id = 'francetv:%s' % video_or_full_id
|
||||
if '@' not in video_or_full_id and catalog:
|
||||
full_id += '@%s' % catalog
|
||||
return self.url_result(
|
||||
full_id, ie=FranceTVIE.ie_key(),
|
||||
video_id=video_or_full_id.split('@')[0])
|
||||
|
||||
|
||||
class FranceTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://
|
||||
sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
|
||||
.*?\bidDiffusion=[^&]+|
|
||||
(?:
|
||||
https?://videos\.francetv\.fr/video/|
|
||||
francetv:
|
||||
)
|
||||
(?P<id>[^@]+)(?:@(?P<catalog>.+))?
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# without catalog
|
||||
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
|
||||
'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
|
||||
'info_dict': {
|
||||
'id': '162311093',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||
'timestamp': 1502623500,
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
}, {
|
||||
# with catalog
|
||||
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'francetv:162311093',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'francetv:NI_1004933@Zouzous',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'francetv:NI_983319@Info-web',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'francetv:NI_983319',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'francetv:NI_657393@Regions',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# france-3 live
|
||||
'url': 'francetv:SIM_France3',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id, catalogue=None):
|
||||
# Videos are identified by idDiffusion so catalogue part is optional.
|
||||
# However when provided, some extra formats may be returned so we pass
|
||||
# it if available.
|
||||
info = self._download_json(
|
||||
'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
|
||||
video_id, 'Downloading video JSON', query={
|
||||
@@ -27,7 +97,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
if info.get('status') == 'NOK':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
|
||||
'%s returned error: %s' % (self.IE_NAME, info['message']),
|
||||
expected=True)
|
||||
allowed_countries = info['videos'][0].get('geoblocage')
|
||||
if allowed_countries:
|
||||
georestricted = True
|
||||
@@ -42,6 +113,21 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
else:
|
||||
georestricted = False
|
||||
|
||||
def sign(manifest_url, manifest_id):
|
||||
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
|
||||
signed_url = self._download_webpage(
|
||||
'https://%s/esi/TA' % host, video_id,
|
||||
'Downloading signed %s manifest URL' % manifest_id,
|
||||
fatal=False, query={
|
||||
'url': manifest_url,
|
||||
})
|
||||
if (signed_url and isinstance(signed_url, compat_str) and
|
||||
re.search(r'^(?:https?:)?//', signed_url)):
|
||||
return signed_url
|
||||
return manifest_url
|
||||
|
||||
is_live = None
|
||||
|
||||
formats = []
|
||||
for video in info['videos']:
|
||||
if video['statut'] != 'ONLINE':
|
||||
@@ -49,6 +135,10 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
video_url = video['url']
|
||||
if not video_url:
|
||||
continue
|
||||
if is_live is None:
|
||||
is_live = (try_get(
|
||||
video, lambda x: x['plages_ouverture'][0]['direct'],
|
||||
bool) is True) or '/live.francetv.fr/' in video_url
|
||||
format_id = video['format']
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
@@ -56,17 +146,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
# See https://github.com/rg3/youtube-dl/issues/3963
|
||||
# m3u8 urls work fine
|
||||
continue
|
||||
f4m_url = self._download_webpage(
|
||||
'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
|
||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
|
||||
video_id, f4m_id=format_id, fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
|
||||
video_id, f4m_id=format_id, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
sign(video_url, format_id), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
@@ -97,33 +184,48 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
catalog = mobj.group('catalog')
|
||||
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
if not video_id:
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = qs.get('idDiffusion', [None])[0]
|
||||
catalog = qs.get('catalogue', [None])[0]
|
||||
if not video_id:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
return self._extract_video(video_id, catalog)
|
||||
|
||||
|
||||
class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||
'info_dict': {
|
||||
'id': '157550144',
|
||||
'id': '162311093',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||
'timestamp': 1494156300,
|
||||
'upload_date': '20170507',
|
||||
'timestamp': 1502623500,
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}, {
|
||||
# france3
|
||||
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
|
||||
@@ -156,6 +258,10 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.france.tv/142749-rouge-sang.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# france-3 live
|
||||
'url': 'https://www.france.tv/france-3/direct.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -172,13 +278,14 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
return self._make_url_result(video_id, catalogue)
|
||||
|
||||
|
||||
class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
|
||||
'info_dict': {
|
||||
'id': 'NI_983319',
|
||||
@@ -188,7 +295,11 @@ class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
|
||||
'timestamp': 1493981780,
|
||||
'duration': 16,
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -197,12 +308,12 @@ class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
|
||||
'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
return self._extract_video(video['video_id'], video.get('catalog'))
|
||||
return self._make_url_result(video['video_id'], video.get('catalog'))
|
||||
|
||||
|
||||
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
|
||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
@@ -217,51 +328,18 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||
'info_dict': {
|
||||
'id': 'EV_20019',
|
||||
'ext': 'mp4',
|
||||
'title': 'Débat des candidats à la Commission européenne',
|
||||
'description': 'Débat des candidats à la Commission européenne',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS (reqires ffmpeg)'
|
||||
},
|
||||
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
|
||||
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
|
||||
'info_dict': {
|
||||
'id': 'NI_173343',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les entreprises familiales : le secret de la réussite',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'timestamp': 1433273139,
|
||||
'upload_date': '20150602',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
|
||||
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
|
||||
'info_dict': {
|
||||
'id': 'NI_657393',
|
||||
'ext': 'mp4',
|
||||
'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"',
|
||||
'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'timestamp': 1458300695,
|
||||
'upload_date': '20160318',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Dailymotion embed
|
||||
'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
|
||||
@@ -283,9 +361,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
dailymotion_urls = DailymotionIE._extract_urls(webpage)
|
||||
if dailymotion_urls:
|
||||
@@ -297,12 +375,13 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
(r'id-video=([^@]+@[^"]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
|
||||
webpage, 'video id').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
return self._make_url_result(video_id, catalogue)
|
||||
|
||||
|
||||
class GenerationWhatIE(InfoExtractor):
|
||||
IE_NAME = 'france2.fr:generation-what'
|
||||
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
|
||||
@@ -314,6 +393,10 @@ class GenerationWhatIE(InfoExtractor):
|
||||
'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
|
||||
'upload_date': '20160411',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
|
||||
'only_matching': True,
|
||||
@@ -321,42 +404,87 @@ class GenerationWhatIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
youtube_id = self._search_regex(
|
||||
r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
|
||||
webpage, 'youtube id')
|
||||
return self.url_result(youtube_id, 'Youtube', youtube_id)
|
||||
|
||||
return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id)
|
||||
|
||||
|
||||
class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'culturebox.francetvinfo.fr'
|
||||
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
|
||||
'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
|
||||
_TESTS = [{
|
||||
'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689',
|
||||
'info_dict': {
|
||||
'id': 'EV_50111',
|
||||
'ext': 'flv',
|
||||
'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
|
||||
'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
|
||||
'upload_date': '20150320',
|
||||
'timestamp': 1426892400,
|
||||
'duration': 2760.9,
|
||||
'id': 'EV_134885',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7',
|
||||
'description': 'md5:19c44af004b88219f4daa50fa9a351d4',
|
||||
'upload_date': '20180206',
|
||||
'timestamp': 1517945220,
|
||||
'duration': 5981,
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, name)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if ">Ce live n'est plus disponible en replay<" in webpage:
|
||||
raise ExtractorError('Video %s is not available' % name, expected=True)
|
||||
raise ExtractorError(
|
||||
'Video %s is not available' % display_id, expected=True)
|
||||
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
|
||||
webpage, 'video id').split('@')
|
||||
|
||||
return self._extract_video(video_id, catalogue)
|
||||
return self._make_url_result(video_id, catalogue)
|
||||
|
||||
|
||||
class FranceTVJeunesseIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P<id>[^/?#&]+))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.zouzous.fr/heros/simon',
|
||||
'info_dict': {
|
||||
'id': 'simon',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'https://www.ludo.fr/heros/ninjago',
|
||||
'info_dict': {
|
||||
'id': 'ninjago',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.zouzous.fr/heros/simon?abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
playlist = self._download_json(
|
||||
'%s/%s' % (mobj.group('url'), 'playlist'), playlist_id)
|
||||
|
||||
if not playlist.get('count'):
|
||||
raise ExtractorError(
|
||||
'%s is not available' % playlist_id, expected=True)
|
||||
|
||||
entries = []
|
||||
for item in playlist['items']:
|
||||
identity = item.get('identity')
|
||||
if identity and isinstance(identity, compat_str):
|
||||
entries.append(self._make_url_result(identity))
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@@ -5,9 +5,9 @@ from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class FusionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
|
||||
'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
|
||||
'info_dict': {
|
||||
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
|
||||
'ext': 'mp4',
|
||||
@@ -20,7 +20,7 @@ class FusionIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://fusion.net/video/201781',
|
||||
'url': 'http://fusion.tv/video/201781',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
@@ -23,6 +23,11 @@ class GameInformerIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id')
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers=self.geo_verification_headers())
|
||||
brightcove_id = self._search_regex(
|
||||
[r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"],
|
||||
webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
|
||||
brightcove_id)
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -9,27 +11,34 @@ from ..utils import (
|
||||
|
||||
|
||||
class GameStarIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?game(?P<site>pro|star)\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
|
||||
'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
|
||||
'md5': 'ee782f1f8050448c95c5cacd63bc851c',
|
||||
'info_dict': {
|
||||
'id': '76110',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
|
||||
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406542020,
|
||||
'timestamp': 1406542380,
|
||||
'upload_date': '20140728',
|
||||
'duration': 17
|
||||
'duration': 17,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.gamepro.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.gamestar.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site = mobj.group('site')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# TODO: there are multiple ld+json objects in the webpage,
|
||||
# while _search_json_ld finds only the first one
|
||||
@@ -37,16 +46,17 @@ class GameStarIE(InfoExtractor):
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>[^<]+VideoObject[^<]+)</script>',
|
||||
webpage, 'JSON-LD', group='json_ld'), video_id)
|
||||
info_dict = self._json_ld(json_ld, video_id)
|
||||
info_dict['title'] = remove_end(info_dict['title'], ' - GameStar')
|
||||
info_dict['title'] = remove_end(
|
||||
info_dict['title'], ' - Game%s' % site.title())
|
||||
|
||||
view_count = json_ld.get('interactionCount')
|
||||
view_count = int_or_none(json_ld.get('interactionCount'))
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'([0-9]+) Kommentare</span>', webpage, 'comment_count',
|
||||
fatal=False))
|
||||
r'<span>Kommentare</span>\s*<span[^>]+class=["\']count[^>]+>\s*\(\s*([0-9]+)',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'url': url,
|
||||
'url': 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id,
|
||||
'ext': 'mp4',
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count
|
||||
|
@@ -100,6 +100,8 @@ from .megaphone import MegaphoneIE
|
||||
from .vzaar import VzaarIE
|
||||
from .channel9 import Channel9IE
|
||||
from .vshare import VShareIE
|
||||
from .mediasite import MediasiteIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -1925,6 +1927,49 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'vl14062007715967',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
|
||||
'md5': 'aecd089f55b1cb5a59032cb049d3a356',
|
||||
'info_dict': {
|
||||
'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
|
||||
'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
|
||||
'timestamp': 1474354800,
|
||||
'upload_date': '20160920',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
|
||||
'info_dict': {
|
||||
'id': '1731611',
|
||||
'ext': 'mp4',
|
||||
'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
|
||||
'description': 'md5:eb5f23826a027ba95277d105f248b825',
|
||||
'timestamp': 1516100691,
|
||||
'upload_date': '20180116',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [SpringboardPlatformIE.ie_key()],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
|
||||
'info_dict': {
|
||||
'id': 'uPDB5I9wfp8',
|
||||
'ext': 'webm',
|
||||
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
|
||||
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
|
||||
'upload_date': '20160219',
|
||||
'uploader': 'Pocoyo - Português (BR)',
|
||||
'uploader_id': 'PocoyoBrazil',
|
||||
},
|
||||
'add_ie': [YoutubeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
# {
|
||||
# # TODO: find another test
|
||||
@@ -2251,7 +2296,10 @@ class GenericIE(InfoExtractor):
|
||||
# Look for Brightcove New Studio embeds
|
||||
bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
|
||||
if bc_urls:
|
||||
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
|
||||
return self.playlist_from_matches(
|
||||
bc_urls, video_id, video_title,
|
||||
getter=lambda x: smuggle_url(x, {'referrer': url}),
|
||||
ie='BrightcoveNew')
|
||||
|
||||
# Look for Nexx embeds
|
||||
nexx_urls = NexxIE._extract_urls(webpage)
|
||||
@@ -2695,9 +2743,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(viewlift_url)
|
||||
|
||||
# Look for JWPlatform embeds
|
||||
jwplatform_url = JWPlatformIE._extract_url(webpage)
|
||||
if jwplatform_url:
|
||||
return self.url_result(jwplatform_url, 'JWPlatform')
|
||||
jwplatform_urls = JWPlatformIE._extract_urls(webpage)
|
||||
if jwplatform_urls:
|
||||
return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
|
||||
|
||||
# Look for Digiteka embeds
|
||||
digiteka_url = DigitekaIE._extract_url(webpage)
|
||||
@@ -2883,6 +2931,22 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
|
||||
|
||||
# Look for Mediasite embeds
|
||||
mediasite_urls = MediasiteIE._extract_urls(webpage)
|
||||
if mediasite_urls:
|
||||
entries = [
|
||||
self.url_result(smuggle_url(
|
||||
compat_urlparse.urljoin(url, mediasite_url),
|
||||
{'UrlReferrer': url}), ie=MediasiteIE.ie_key())
|
||||
for mediasite_url in mediasite_urls]
|
||||
return self.playlist_result(entries, video_id, video_title)
|
||||
|
||||
springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
|
||||
if springboardplatform_urls:
|
||||
return self.playlist_from_matches(
|
||||
springboardplatform_urls, video_id, video_title,
|
||||
ie=SpringboardPlatformIE.ie_key())
|
||||
|
||||
def merge_dicts(dict1, dict2):
|
||||
merged = {}
|
||||
for k, v in dict1.items():
|
||||
|
@@ -1,8 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
@@ -48,7 +47,7 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
if 'mediaKey' not in mkd:
|
||||
raise ExtractorError('Did not get a media key')
|
||||
|
||||
redirect_url = base64.b64decode(video_url_base64).decode('utf-8')
|
||||
redirect_url = compat_b64decode(video_url_base64).decode('utf-8')
|
||||
redirect_req = HEADRequest(redirect_url)
|
||||
req = self._request_webpage(
|
||||
redirect_req, video_id,
|
||||
|
@@ -2,9 +2,8 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -61,7 +60,7 @@ class InfoQIE(BokeCCBaseIE):
|
||||
encoded_id = self._search_regex(
|
||||
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None)
|
||||
|
||||
real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||
real_id = compat_urllib_parse_unquote(compat_b64decode(encoded_id).decode('utf-8'))
|
||||
playpath = 'mp4:' + real_id
|
||||
|
||||
return [{
|
||||
|
64
youtube_dl/extractor/internazionale.py
Normal file
64
youtube_dl/extractor/internazionale.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class InternazionaleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
|
||||
'md5': '3e39d32b66882c1218e305acbf8348ca',
|
||||
'info_dict': {
|
||||
'id': '265968',
|
||||
'display_id': 'richard-linklater-racconta-una-scena-di-boyhood',
|
||||
'ext': 'mp4',
|
||||
'title': 'Richard Linklater racconta una scena di Boyhood',
|
||||
'description': 'md5:efb7e5bbfb1a54ae2ed5a4a015f0e665',
|
||||
'timestamp': 1424354635,
|
||||
'upload_date': '20150219',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
DATA_RE = r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1'
|
||||
|
||||
title = self._search_regex(
|
||||
DATA_RE % 'video-title', webpage, 'title', default=None,
|
||||
group='value') or self._og_search_title(webpage)
|
||||
|
||||
video_id = self._search_regex(
|
||||
DATA_RE % 'job-id', webpage, 'video id', group='value')
|
||||
video_path = self._search_regex(
|
||||
DATA_RE % 'video-path', webpage, 'video path', group='value')
|
||||
|
||||
video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_base + 'm3u8', display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_base + 'mpd', display_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_meta(
|
||||
'article:published_time', webpage, 'timestamp'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@@ -23,11 +23,14 @@ class JWPlatformIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<(?:script|iframe)[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
|
||||
urls = JWPlatformIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//content\.jwplatform\.com/players/[a-zA-Z0-9]{8})',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -125,9 +125,12 @@ class KalturaIE(InfoExtractor):
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
(?P=q1).*?
|
||||
(?:
|
||||
entry_?[Ii]d|
|
||||
(?P<q2>["'])entry_?[Ii]d(?P=q2)
|
||||
)\s*:\s*
|
||||
(?:
|
||||
entry_?[Ii]d|
|
||||
(?P<q2>["'])entry_?[Ii]d(?P=q2)
|
||||
)\s*:\s*|
|
||||
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
|
||||
)
|
||||
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||
''', webpage) or
|
||||
re.search(
|
||||
|
@@ -1,71 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class KamcordIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?kamcord\.com/v/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.kamcord.com/v/hNYRduDgWb4',
|
||||
'md5': 'c3180e8a9cfac2e86e1b88cb8751b54c',
|
||||
'info_dict': {
|
||||
'id': 'hNYRduDgWb4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drinking Madness',
|
||||
'uploader': 'jacksfilms',
|
||||
'uploader_id': '3044562',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__props\s*=\s*({.+?});?(?:\n|\s*</script)',
|
||||
webpage, 'video'),
|
||||
video_id)['video']
|
||||
|
||||
title = video['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video['play']['hls'], video_id, 'mp4', entry_protocol='m3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = video.get('user', {}).get('username')
|
||||
uploader_id = video.get('user', {}).get('id')
|
||||
|
||||
view_count = int_or_none(video.get('viewCount'))
|
||||
like_count = int_or_none(video.get('heartCount'))
|
||||
comment_count = int_or_none(video.get('messageCount'))
|
||||
|
||||
preference_key = qualities(('small', 'medium', 'large'))
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail_url,
|
||||
'id': thumbnail_id,
|
||||
'preference': preference_key(thumbnail_id),
|
||||
} for thumbnail_id, thumbnail_url in (video.get('thumbnail') or {}).items()
|
||||
if isinstance(thumbnail_id, compat_str) and isinstance(thumbnail_url, compat_str)]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@@ -49,7 +49,9 @@ class LA7IE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data = self._parse_json(
|
||||
self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
|
||||
self._search_regex(
|
||||
[r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'],
|
||||
webpage, 'player data'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
return {
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
@@ -9,6 +8,7 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
@@ -329,7 +329,7 @@ class LetvCloudIE(InfoExtractor):
|
||||
raise ExtractorError('Letv cloud returned an unknwon error')
|
||||
|
||||
def b64decode(s):
|
||||
return base64.b64decode(s.encode('utf-8')).decode('utf-8')
|
||||
return compat_b64decode(s).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
for media in play_json['data']['video_info']['media'].values():
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -220,6 +221,12 @@ class LimelightBaseIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_info_helper(self, pc, mobile, i, metadata):
|
||||
return self._extract_info(
|
||||
try_get(pc, lambda x: x['playlistItems'][i]['streams'], list) or [],
|
||||
try_get(mobile, lambda x: x['mediaList'][i]['mobileUrls'], list) or [],
|
||||
metadata)
|
||||
|
||||
|
||||
class LimelightMediaIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight'
|
||||
@@ -282,10 +289,7 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
'getMobilePlaylistByMediaId', 'properties',
|
||||
smuggled_data.get('source_url'))
|
||||
|
||||
return self._extract_info(
|
||||
pc['playlistItems'][0].get('streams', []),
|
||||
mobile['mediaList'][0].get('mobileUrls', []) if mobile else [],
|
||||
metadata)
|
||||
return self._extract_info_helper(pc, mobile, 0, metadata)
|
||||
|
||||
|
||||
class LimelightChannelIE(LimelightBaseIE):
|
||||
@@ -326,10 +330,7 @@ class LimelightChannelIE(LimelightBaseIE):
|
||||
'media', smuggled_data.get('source_url'))
|
||||
|
||||
entries = [
|
||||
self._extract_info(
|
||||
pc['playlistItems'][i].get('streams', []),
|
||||
mobile['mediaList'][i].get('mobileUrls', []) if mobile else [],
|
||||
medias['media_list'][i])
|
||||
self._extract_info_helper(pc, mobile, i, medias['media_list'][i])
|
||||
for i in range(len(medias['media_list']))]
|
||||
|
||||
return self.playlist_result(entries, channel_id, pc['title'])
|
||||
|
@@ -94,7 +94,15 @@ class LyndaBaseIE(InfoExtractor):
|
||||
class LyndaIE(LyndaBaseIE):
|
||||
IE_NAME = 'lynda'
|
||||
IE_DESC = 'lynda.com videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?(?:lynda\.com|educourse\.ga)/
|
||||
(?:
|
||||
(?:[^/]+/){2,3}(?P<course_id>\d+)|
|
||||
player/embed
|
||||
)/
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||
|
||||
@@ -113,6 +121,9 @@ class LyndaIE(LyndaBaseIE):
|
||||
}, {
|
||||
'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _raise_unavailable(self, video_id):
|
||||
@@ -244,8 +255,9 @@ class LyndaIE(LyndaBaseIE):
|
||||
def _get_subtitles(self, video_id):
|
||||
url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||
subs = self._download_json(url, None, False)
|
||||
if subs:
|
||||
return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
|
||||
fixed_subs = self._fix_subtitles(subs)
|
||||
if fixed_subs:
|
||||
return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
|
||||
else:
|
||||
return {}
|
||||
|
||||
@@ -256,7 +268,15 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
|
||||
# Course link equals to welcome/introduction video link of same course
|
||||
# We will recognize it as course link
|
||||
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
||||
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -1,20 +1,32 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MailRuIE(InfoExtractor):
|
||||
IE_NAME = 'mailru'
|
||||
IE_DESC = 'Видео@Mail.Ru'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
|
||||
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:www|m)\.)?my\.mail\.ru/
|
||||
(?:
|
||||
video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|
|
||||
(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html|
|
||||
(?:video/embed|\+/video/meta)/(?P<metaid>\d+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
|
||||
@@ -23,7 +35,7 @@ class MailRuIE(InfoExtractor):
|
||||
'id': '46301138_76',
|
||||
'ext': 'mp4',
|
||||
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
|
||||
'timestamp': 1393232740,
|
||||
'timestamp': 1393235077,
|
||||
'upload_date': '20140224',
|
||||
'uploader': 'sonypicturesrus',
|
||||
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||
@@ -40,7 +52,7 @@ class MailRuIE(InfoExtractor):
|
||||
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
||||
'timestamp': 1397039888,
|
||||
'upload_date': '20140409',
|
||||
'uploader': 'hitech@corp.mail.ru',
|
||||
'uploader': 'hitech',
|
||||
'uploader_id': 'hitech@corp.mail.ru',
|
||||
'duration': 245,
|
||||
},
|
||||
@@ -65,28 +77,42 @@ class MailRuIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://my.mail.ru/video/embed/7949340477499637815',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://my.mail.ru/+/video/meta/7949340477499637815',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('idv1')
|
||||
meta_id = mobj.group('metaid')
|
||||
|
||||
if not video_id:
|
||||
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id = None
|
||||
if meta_id:
|
||||
meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id
|
||||
else:
|
||||
video_id = mobj.group('idv1')
|
||||
if not video_id:
|
||||
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
page_config = self._parse_json(self._search_regex(
|
||||
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
||||
webpage, 'page config', default='{}'), video_id, fatal=False)
|
||||
if page_config:
|
||||
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
|
||||
else:
|
||||
meta_url = None
|
||||
|
||||
video_data = None
|
||||
|
||||
page_config = self._parse_json(self._search_regex(
|
||||
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
||||
webpage, 'page config', default='{}'), video_id, fatal=False)
|
||||
if page_config:
|
||||
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
|
||||
if meta_url:
|
||||
video_data = self._download_json(
|
||||
meta_url, video_id, 'Downloading video meta JSON', fatal=False)
|
||||
if meta_url:
|
||||
video_data = self._download_json(
|
||||
meta_url, video_id or meta_id, 'Downloading video meta JSON',
|
||||
fatal=not video_id)
|
||||
|
||||
# Fallback old approach
|
||||
if not video_data:
|
||||
@@ -136,3 +162,153 @@ class MailRuIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MailRuMusicSearchBaseIE(InfoExtractor):
|
||||
def _search(self, query, url, audio_id, limit=100, offset=0):
|
||||
search = self._download_json(
|
||||
'https://my.mail.ru/cgi-bin/my/ajax', audio_id,
|
||||
'Downloading songs JSON page %d' % (offset // limit + 1),
|
||||
headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, query={
|
||||
'xemail': '',
|
||||
'ajax_call': '1',
|
||||
'func_name': 'music.search',
|
||||
'mna': '',
|
||||
'mnb': '',
|
||||
'arg_query': query,
|
||||
'arg_extended': '1',
|
||||
'arg_search_params': json.dumps({
|
||||
'music': {
|
||||
'limit': limit,
|
||||
'offset': offset,
|
||||
},
|
||||
}),
|
||||
'arg_limit': limit,
|
||||
'arg_offset': offset,
|
||||
})
|
||||
return next(e for e in search if isinstance(e, dict))
|
||||
|
||||
@staticmethod
|
||||
def _extract_track(t, fatal=True):
|
||||
audio_url = t['URL'] if fatal else t.get('URL')
|
||||
if not audio_url:
|
||||
return
|
||||
|
||||
audio_id = t['File'] if fatal else t.get('File')
|
||||
if not audio_id:
|
||||
return
|
||||
|
||||
thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover')
|
||||
uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML')
|
||||
uploader_id = t.get('UploaderID')
|
||||
duration = int_or_none(t.get('DurationInSeconds')) or parse_duration(
|
||||
t.get('Duration') or t.get('DurationStr'))
|
||||
view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr'))
|
||||
|
||||
track = t.get('Name') or t.get('Name_Text_HTML')
|
||||
artist = t.get('Author') or t.get('Author_Text_HTML')
|
||||
|
||||
if track:
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
else:
|
||||
title = audio_id
|
||||
|
||||
return {
|
||||
'extractor_key': MailRuMusicIE.ie_key(),
|
||||
'id': audio_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'vcodec': 'none',
|
||||
'abr': int_or_none(t.get('BitRate')),
|
||||
'track': track,
|
||||
'artist': artist,
|
||||
'album': t.get('Album'),
|
||||
'url': audio_url,
|
||||
}
|
||||
|
||||
|
||||
class MailRuMusicIE(MailRuMusicSearchBaseIE):
|
||||
IE_NAME = 'mailru:music'
|
||||
IE_DESC = 'Музыка@Mail.Ru'
|
||||
_VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
|
||||
'md5': '0f8c22ef8c5d665b13ac709e63025610',
|
||||
'info_dict': {
|
||||
'id': '4e31f7125d0dfaef505d947642366893',
|
||||
'ext': 'mp3',
|
||||
'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ',
|
||||
'uploader': 'Игорь Мудрый',
|
||||
'uploader_id': '1459196328',
|
||||
'duration': 280,
|
||||
'view_count': int,
|
||||
'vcodec': 'none',
|
||||
'abr': 320,
|
||||
'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017',
|
||||
'artist': 'М8Л8ТХ',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
music_data = self._search(title, url, audio_id)['MusicData']
|
||||
t = next(t for t in music_data if t.get('File') == audio_id)
|
||||
|
||||
info = self._extract_track(t)
|
||||
info['title'] = title
|
||||
return info
|
||||
|
||||
|
||||
class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
|
||||
IE_NAME = 'mailru:music:search'
|
||||
IE_DESC = 'Музыка@Mail.Ru'
|
||||
_VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://my.mail.ru/music/search/black%20shadow',
|
||||
'info_dict': {
|
||||
'id': 'black shadow',
|
||||
},
|
||||
'playlist_mincount': 532,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = compat_urllib_parse_unquote(self._match_id(url))
|
||||
|
||||
entries = []
|
||||
|
||||
LIMIT = 100
|
||||
offset = 0
|
||||
|
||||
for _ in itertools.count(1):
|
||||
search = self._search(query, url, query, LIMIT, offset)
|
||||
|
||||
music_data = search.get('MusicData')
|
||||
if not music_data or not isinstance(music_data, list):
|
||||
break
|
||||
|
||||
for t in music_data:
|
||||
track = self._extract_track(t, fatal=False)
|
||||
if track:
|
||||
entries.append(track)
|
||||
|
||||
total = try_get(
|
||||
search, lambda x: x['Results']['music']['Total'], int)
|
||||
|
||||
if total is not None:
|
||||
if offset > total:
|
||||
break
|
||||
|
||||
offset += LIMIT
|
||||
|
||||
return self.playlist_result(entries, query)
|
||||
|
@@ -1,13 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class MangomoloBaseIE(InfoExtractor):
|
||||
@@ -51,4 +50,4 @@ class MangomoloLiveIE(MangomoloBaseIE):
|
||||
_IS_LIVE = True
|
||||
|
||||
def _get_real_id(self, page_id):
|
||||
return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode()
|
||||
return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode()
|
||||
|
214
youtube_dl/extractor/mediasite.py
Normal file
214
youtube_dl/extractor/mediasite.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class MediasiteIE(InfoExtractor):
|
||||
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
|
||||
'info_dict': {
|
||||
'id': '2db6c271681e4f199af3c60d1f82869b1d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles',
|
||||
'description': 'Sir Andrew Wiles: “Equations in arithmetic”\\n\\nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers\\u0027.',
|
||||
'timestamp': 1474268400.0,
|
||||
'upload_date': '20160919',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb',
|
||||
'info_dict': {
|
||||
'id': '90bb363295d945d6b548c867d01181361d',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150429',
|
||||
'title': '5) IT-forum 2015-Dag 1 - Dungbeetle - How and why Rain created a tiny bug tracker for Unity',
|
||||
'timestamp': 1430311380.0,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
|
||||
'md5': '481fda1c11f67588c0d9d8fbdced4e39',
|
||||
'info_dict': {
|
||||
'id': '585a43626e544bdd97aeb71a0ec907a01d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
|
||||
'duration': 7713.088,
|
||||
'timestamp': 1413309600,
|
||||
'upload_date': '20141014',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
|
||||
'md5': 'ef1fdded95bdf19b12c5999949419c92',
|
||||
'info_dict': {
|
||||
'id': '86a9ea9f53e149079fbdb4202b521ed21d',
|
||||
'ext': 'wmv',
|
||||
'title': '64ste Vakantiecursus: Afvalwater',
|
||||
'description': 'md5:7fd774865cc69d972f542b157c328305',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
|
||||
'duration': 10853,
|
||||
'timestamp': 1326446400,
|
||||
'upload_date': '20120113',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
|
||||
'md5': '9422edc9b9a60151727e4b6d8bef393d',
|
||||
'info_dict': {
|
||||
'id': '24aace4429fc450fb5b38cdbf424a66e1d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Xyce Software Training - Section 1',
|
||||
'description': r're:(?s)SAND Number: SAND 2013-7800.{200,}',
|
||||
'upload_date': '20120409',
|
||||
'timestamp': 1333983600,
|
||||
'duration': 7794,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
|
||||
_STREAM_TYPES = {
|
||||
0: 'video1', # the main video
|
||||
2: 'slide',
|
||||
3: 'presentation',
|
||||
4: 'video2', # screencast?
|
||||
5: 'video3',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
unescapeHTML(mobj.group('url'))
|
||||
for mobj in re.finditer(
|
||||
r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, data = unsmuggle_url(url, {})
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
resource_id = mobj.group('id')
|
||||
query = mobj.group('query')
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
|
||||
redirect_url = compat_str(urlh.geturl())
|
||||
|
||||
# XXX: might have also extracted UrlReferrer and QueryString from the html
|
||||
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
|
||||
r'<div[^>]+\bid=["\']ServicePath[^>]+>(.+?)</div>', webpage, resource_id,
|
||||
default='/Mediasite/PlayerService/PlayerService.svc/json'))
|
||||
|
||||
player_options = self._download_json(
|
||||
'%s/GetPlayerOptions' % service_path, resource_id,
|
||||
headers={
|
||||
'Content-type': 'application/json; charset=utf-8',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
data=json.dumps({
|
||||
'getPlayerOptionsRequest': {
|
||||
'ResourceId': resource_id,
|
||||
'QueryString': query,
|
||||
'UrlReferrer': data.get('UrlReferrer', ''),
|
||||
'UseScreenReader': False,
|
||||
}
|
||||
}).encode('utf-8'))['d']
|
||||
|
||||
presentation = player_options['Presentation']
|
||||
title = presentation['Title']
|
||||
|
||||
if presentation is None:
|
||||
raise ExtractorError(
|
||||
'Mediasite says: %s' % player_options['PlayerPresentationStatusMessage'],
|
||||
expected=True)
|
||||
|
||||
thumbnails = []
|
||||
formats = []
|
||||
for snum, Stream in enumerate(presentation['Streams']):
|
||||
stream_type = Stream.get('StreamType')
|
||||
if stream_type is None:
|
||||
continue
|
||||
|
||||
video_urls = Stream.get('VideoUrls')
|
||||
if not isinstance(video_urls, list):
|
||||
video_urls = []
|
||||
|
||||
stream_id = self._STREAM_TYPES.get(
|
||||
stream_type, 'type%u' % stream_type)
|
||||
|
||||
stream_formats = []
|
||||
for unum, VideoUrl in enumerate(video_urls):
|
||||
video_url = VideoUrl.get('Location')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
||||
|
||||
media_type = VideoUrl.get('MediaType')
|
||||
if media_type == 'SS':
|
||||
stream_formats.extend(self._extract_ism_formats(
|
||||
video_url, resource_id,
|
||||
ism_id='%s-%u.%u' % (stream_id, snum, unum),
|
||||
fatal=False))
|
||||
elif media_type == 'Dash':
|
||||
stream_formats.extend(self._extract_mpd_formats(
|
||||
video_url, resource_id,
|
||||
mpd_id='%s-%u.%u' % (stream_id, snum, unum),
|
||||
fatal=False))
|
||||
else:
|
||||
stream_formats.append({
|
||||
'format_id': '%s-%u.%u' % (stream_id, snum, unum),
|
||||
'url': video_url,
|
||||
'ext': mimetype2ext(VideoUrl.get('MimeType')),
|
||||
})
|
||||
|
||||
# TODO: if Stream['HasSlideContent']:
|
||||
# synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
|
||||
# from Stream['Slides']
|
||||
# this will require writing a custom downloader...
|
||||
|
||||
# disprefer 'secondary' streams
|
||||
if stream_type != 0:
|
||||
for fmt in stream_formats:
|
||||
fmt['preference'] = -1
|
||||
|
||||
thumbnail_url = Stream.get('ThumbnailUrl')
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'id': '%s-%u' % (stream_id, snum),
|
||||
'url': urljoin(redirect_url, thumbnail_url),
|
||||
'preference': -1 if stream_type != 0 else 0,
|
||||
})
|
||||
formats.extend(stream_formats)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
# XXX: Presentation['Presenters']
|
||||
# XXX: Presentation['Transcript']
|
||||
|
||||
return {
|
||||
'id': resource_id,
|
||||
'title': title,
|
||||
'description': presentation.get('Description'),
|
||||
'duration': float_or_none(presentation.get('Duration'), 1000),
|
||||
'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
@@ -1,13 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -42,31 +42,33 @@ class MiTeleBaseIE(InfoExtractor):
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
bas = location.get('bas')
|
||||
loc = location.get('loc')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, bas, loc, ogn):
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'bas': bas,
|
||||
'icd': loc,
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': '0',
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
'%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)),
|
||||
video_id, 'Downloading %s JSON' % location['loc'])
|
||||
file_ = media.get('file')
|
||||
if not file_:
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
})
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(file_)
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -1,12 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
@@ -79,7 +79,7 @@ class MixcloudIE(InfoExtractor):
|
||||
|
||||
if encrypted_play_info is not None:
|
||||
# Decode
|
||||
encrypted_play_info = base64.b64decode(encrypted_play_info)
|
||||
encrypted_play_info = compat_b64decode(encrypted_play_info)
|
||||
else:
|
||||
# New path
|
||||
full_info_json = self._parse_json(self._html_search_regex(
|
||||
@@ -109,7 +109,7 @@ class MixcloudIE(InfoExtractor):
|
||||
kpa_target = encrypted_play_info
|
||||
else:
|
||||
kps = ['https://', 'http://']
|
||||
kpa_target = base64.b64decode(info_json['streamInfo']['url'])
|
||||
kpa_target = compat_b64decode(info_json['streamInfo']['url'])
|
||||
for kp in kps:
|
||||
partial_key = self._decrypt_xor_cipher(kpa_target, kp)
|
||||
for quote in ["'", '"']:
|
||||
@@ -165,7 +165,7 @@ class MixcloudIE(InfoExtractor):
|
||||
format_url = stream_info.get(url_key)
|
||||
if not format_url:
|
||||
continue
|
||||
decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url))
|
||||
decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url))
|
||||
if not decrypted:
|
||||
continue
|
||||
if url_key == 'hlsUrl':
|
||||
|
@@ -4,8 +4,11 @@ import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -114,3 +117,86 @@ class MotherlessIE(InfoExtractor):
|
||||
'age_limit': age_limit,
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
|
||||
class MotherlessGroupIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/g/movie_scenes',
|
||||
'info_dict': {
|
||||
'id': 'movie_scenes',
|
||||
'title': 'Movie Scenes',
|
||||
'description': 'Hot and sexy scenes from "regular" movies... '
|
||||
'Beautiful actresses fully nude... A looot of '
|
||||
'skin! :)Enjoy!',
|
||||
},
|
||||
'playlist_mincount': 662,
|
||||
}, {
|
||||
'url': 'http://motherless.com/gv/sex_must_be_funny',
|
||||
'info_dict': {
|
||||
'id': 'sex_must_be_funny',
|
||||
'title': 'Sex must be funny',
|
||||
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
|
||||
'any kind!'
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if MotherlessIE.suitable(url)
|
||||
else super(MotherlessGroupIE, cls).suitable(url))
|
||||
|
||||
def _extract_entries(self, webpage, base):
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
|
||||
webpage):
|
||||
video_url = compat_urlparse.urljoin(base, mobj.group('href'))
|
||||
if not MotherlessIE.suitable(video_url):
|
||||
continue
|
||||
video_id = MotherlessIE._match_id(video_url)
|
||||
title = mobj.group('title')
|
||||
entries.append(self.url_result(
|
||||
video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
|
||||
video_title=title))
|
||||
# Alternative fallback
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(base, '/' + video_id),
|
||||
ie=MotherlessIE.ie_key(), video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'data-codename=["\']([A-Z0-9]+)', webpage))]
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
|
||||
webpage = self._download_webpage(page_url, group_id)
|
||||
title = self._search_regex(
|
||||
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, fatal=False)
|
||||
page_count = self._int(self._search_regex(
|
||||
r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
|
||||
webpage, 'page_count'), 'page_count')
|
||||
PAGE_SIZE = 80
|
||||
|
||||
def _get_page(idx):
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
for entry in self._extract_entries(webpage, url):
|
||||
yield entry
|
||||
|
||||
playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': group_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': playlist
|
||||
}
|
||||
|
@@ -3,22 +3,31 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimple import SprutoBaseIE
|
||||
|
||||
|
||||
class MyviIE(SprutoBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
myvi\.(?:ru/player|tv)/
|
||||
(?:
|
||||
(?:
|
||||
https?://
|
||||
(?:www\.)?
|
||||
myvi\.
|
||||
(?:
|
||||
embed/html|
|
||||
flash|
|
||||
api/Video/Get
|
||||
)/|
|
||||
content/preloader\.swf\?.*\bid=
|
||||
)
|
||||
(?P<id>[\da-zA-Z_-]+)
|
||||
(?:ru/player|tv)/
|
||||
(?:
|
||||
(?:
|
||||
embed/html|
|
||||
flash|
|
||||
api/Video/Get
|
||||
)/|
|
||||
content/preloader\.swf\?.*\bid=
|
||||
)|
|
||||
ru/watch/
|
||||
)|
|
||||
myvi:
|
||||
)
|
||||
(?P<id>[\da-zA-Z_-]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
|
||||
@@ -42,6 +51,12 @@ class MyviIE(SprutoBaseIE):
|
||||
}, {
|
||||
'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.myvi.ru/watch/YwbqszQynUaHPn_s82sx0Q2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'myvi:YwbqszQynUaHPn_s82sx0Q2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -58,3 +73,39 @@ class MyviIE(SprutoBaseIE):
|
||||
'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
|
||||
|
||||
return self._extract_spruto(spruto, video_id)
|
||||
|
||||
|
||||
class MyviEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?myvi\.tv/(?:[^?]+\?.*?\bv=|embed/)(?P<id>[\da-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.myvi.tv/embed/ccdqic3wgkqwpb36x9sxg43t4r',
|
||||
'info_dict': {
|
||||
'id': 'b3ea0663-3234-469d-873e-7fecf36b31d1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Твоя (original song).mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 277,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.myvi.tv/idmi6o?v=ccdqic3wgkqwpb36x9sxg43t4r#watch',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if MyviIE.suitable(url) else super(MyviEmbedIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.myvi.tv/embed/%s' % video_id, video_id)
|
||||
|
||||
myvi_id = self._search_regex(
|
||||
r'CreatePlayer\s*\(\s*["\'].*?\bv=([\da-zA-Z_]+)',
|
||||
webpage, 'video id')
|
||||
|
||||
return self.url_result('myvi:%s' % myvi_id, ie=MyviIE.ie_key())
|
||||
|
@@ -68,7 +68,7 @@ class NationalGeographicVideoIE(InfoExtractor):
|
||||
|
||||
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
IE_NAME = 'natgeo'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -102,6 +102,10 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
@@ -358,6 +359,7 @@ class NBCNewsIE(ThePlatformIE):
|
||||
|
||||
|
||||
class NBCOlympicsIE(InfoExtractor):
|
||||
IE_NAME = 'nbcolympics'
|
||||
_VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
@@ -395,3 +397,54 @@ class NBCOlympicsIE(InfoExtractor):
|
||||
'ie_key': ThePlatformIE.ie_key(),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
|
||||
class NBCOlympicsStreamIE(AdobePassIE):
|
||||
IE_NAME = 'nbcolympics:stream'
|
||||
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
|
||||
'info_dict': {
|
||||
'id': '203493',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
_DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
|
||||
resource = self._search_regex(
|
||||
r"resource\s*=\s*'(.+)';", webpage,
|
||||
'resource').replace("' + pid + '", pid)
|
||||
event_config = self._download_json(
|
||||
self._DATA_URL_TEMPLATE % ('event_config', pid),
|
||||
pid)['eventConfig']
|
||||
title = self._live_title(event_config['eventTitle'])
|
||||
source_url = self._download_json(
|
||||
self._DATA_URL_TEMPLATE % ('live_sources', pid),
|
||||
pid)['videoSources'][0]['sourceUrl']
|
||||
media_token = self._extract_mvpd_auth(
|
||||
url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
|
||||
formats = self._extract_m3u8_formats(self._download_webpage(
|
||||
'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
|
||||
'cdn': 'akamai',
|
||||
'mediaToken': base64.b64encode(media_token.encode()),
|
||||
'resource': base64.b64encode(resource.encode()),
|
||||
'url': source_url,
|
||||
}), pid, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': pid,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
@@ -190,10 +190,12 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||
ext = determine_ext(src, None)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
|
||||
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'))
|
||||
src, video_id, 'mp4', m3u8_id='hls',
|
||||
entry_protocol='m3u8_native', fatal=False))
|
||||
else:
|
||||
quality = f.get('quality')
|
||||
ff = {
|
||||
|
@@ -87,19 +87,21 @@ class NewgroundsIE(InfoExtractor):
|
||||
self._check_formats(formats, media_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
|
||||
uploader = self._html_search_regex(
|
||||
(r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
|
||||
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
|
||||
fatal=False)
|
||||
|
||||
timestamp = unified_timestamp(self._search_regex(
|
||||
r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
(r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
|
||||
r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
|
||||
default=None))
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
|
||||
default=None))
|
||||
r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
|
||||
'duration', default=None))
|
||||
|
||||
filesize_approx = parse_filesize(self._html_search_regex(
|
||||
r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
|
||||
r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
|
||||
default=None))
|
||||
if len(formats) == 1:
|
||||
formats[0]['filesize_approx'] = filesize_approx
|
||||
|
@@ -1,27 +1,23 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class NexxIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
|
||||
nexx:(?P<domain_id_s>\d+):
|
||||
https?://api\.nexx(?:\.cloud|cdn\.com)/v3/\d+/videos/byid/|
|
||||
nexx:(?:\d+:)?|
|
||||
https?://arc\.nexx\.cloud/api/video/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
@@ -67,6 +63,12 @@ class NexxIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'nexx:748:128907',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nexx:128907',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://arc.nexx.cloud/api/video/128907.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -101,87 +103,12 @@ class NexxIE(InfoExtractor):
|
||||
def _extract_url(webpage):
|
||||
return NexxIE._extract_urls(webpage)[0]
|
||||
|
||||
def _handle_error(self, response):
|
||||
status = int_or_none(try_get(
|
||||
response, lambda x: x['metadata']['status']) or 200)
|
||||
if 200 <= status < 300:
|
||||
return
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
|
||||
expected=True)
|
||||
|
||||
def _call_api(self, domain_id, path, video_id, data=None, headers={}):
|
||||
headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
|
||||
result = self._download_json(
|
||||
'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
|
||||
'Downloading %s JSON' % path, data=urlencode_postdata(data),
|
||||
headers=headers)
|
||||
self._handle_error(result)
|
||||
return result['result']
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Reverse engineered from JS code (see getDeviceID function)
|
||||
device_id = '%d:%d:%d%d' % (
|
||||
random.randint(1, 4), int(time.time()),
|
||||
random.randint(1e4, 99999), random.randint(1, 9))
|
||||
|
||||
result = self._call_api(domain_id, 'session/init', video_id, data={
|
||||
'nxp_devh': device_id,
|
||||
'nxp_userh': '',
|
||||
'precid': '0',
|
||||
'playlicense': '0',
|
||||
'screenx': '1920',
|
||||
'screeny': '1080',
|
||||
'playerversion': '6.0.00',
|
||||
'gateway': 'html5',
|
||||
'adGateway': '',
|
||||
'explicitlanguage': 'en-US',
|
||||
'addTextTemplates': '1',
|
||||
'addDomainData': '1',
|
||||
'addAdModel': '1',
|
||||
}, headers={
|
||||
'X-Request-Enable-Auth-Fallback': '1',
|
||||
})
|
||||
|
||||
cid = result['general']['cid']
|
||||
|
||||
# As described in [1] X-Request-Token generation algorithm is
|
||||
# as follows:
|
||||
# md5( operation + domain_id + domain_secret )
|
||||
# where domain_secret is a static value that will be given by nexx.tv
|
||||
# as per [1]. Here is how this "secret" is generated (reversed
|
||||
# from _play.api.init function, search for clienttoken). So it's
|
||||
# actually not static and not that much of a secret.
|
||||
# 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
|
||||
secret = result['device']['clienttoken'][int(device_id[0]):]
|
||||
secret = secret[0:len(secret) - int(device_id[-1])]
|
||||
|
||||
op = 'byid'
|
||||
|
||||
# Reversed from JS code for _play.api.call function (search for
|
||||
# X-Request-Token)
|
||||
request_token = hashlib.md5(
|
||||
''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
|
||||
|
||||
video = self._call_api(
|
||||
domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
|
||||
'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
|
||||
'addInteractionOptions': '1',
|
||||
'addStatusDetails': '1',
|
||||
'addStreamDetails': '1',
|
||||
'addCaptions': '1',
|
||||
'addScenes': '1',
|
||||
'addHotSpots': '1',
|
||||
'addBumpers': '1',
|
||||
'captionFormat': 'data',
|
||||
}, headers={
|
||||
'X-Request-CID': cid,
|
||||
'X-Request-Token': request_token,
|
||||
})
|
||||
video = self._download_json(
|
||||
'https://arc.nexx.cloud/api/video/%s.json' % video_id,
|
||||
video_id)['result']
|
||||
|
||||
general = video['general']
|
||||
title = general['title']
|
||||
|
@@ -13,7 +13,7 @@ class NineGagIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
||||
'info_dict': {
|
||||
'id': 'Kk2X5',
|
||||
'id': 'kXzwOKyGlSA',
|
||||
'ext': 'mp4',
|
||||
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
||||
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
||||
|
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
fix_xml_ampersands,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
qualities,
|
||||
@@ -38,7 +39,7 @@ class NPOIE(NPOBaseIE):
|
||||
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
|
||||
ntr\.nl/(?:[^/]+/){2,}|
|
||||
omroepwnl\.nl/video/fragment/[^/]+__|
|
||||
(?:zapp|npo3)\.nl/(?:[^/]+/){2}
|
||||
(?:zapp|npo3)\.nl/(?:[^/]+/){2,}
|
||||
)
|
||||
)
|
||||
(?P<id>[^/?#]+)
|
||||
@@ -156,6 +157,9 @@ class NPOIE(NPOBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -170,6 +174,10 @@ class NPOIE(NPOBaseIE):
|
||||
transform_source=strip_jsonp,
|
||||
)
|
||||
|
||||
error = metadata.get('error')
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
# For some videos actual video id (prid) is different (e.g. for
|
||||
# http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
|
||||
# video id is POMS_WNL_853698 but prid is POW_00996502)
|
||||
@@ -187,7 +195,11 @@ class NPOIE(NPOBaseIE):
|
||||
formats = []
|
||||
urls = set()
|
||||
|
||||
quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
|
||||
QUALITY_LABELS = ('Laag', 'Normaal', 'Hoog')
|
||||
QUALITY_FORMATS = ('adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std')
|
||||
|
||||
quality_from_label = qualities(QUALITY_LABELS)
|
||||
quality_from_format_id = qualities(QUALITY_FORMATS)
|
||||
items = self._download_json(
|
||||
'http://ida.omroep.nl/app.php/%s' % video_id, video_id,
|
||||
'Downloading formats JSON', query={
|
||||
@@ -203,11 +215,27 @@ class NPOIE(NPOBaseIE):
|
||||
r'video/ida/([^/]+)', item_url, 'format id',
|
||||
default=None)
|
||||
|
||||
item_label = item.get('label')
|
||||
|
||||
def add_format_url(format_url):
|
||||
width = int_or_none(self._search_regex(
|
||||
r'(\d+)[xX]\d+', format_url, 'width', default=None))
|
||||
height = int_or_none(self._search_regex(
|
||||
r'\d+[xX](\d+)', format_url, 'height', default=None))
|
||||
if item_label in QUALITY_LABELS:
|
||||
quality = quality_from_label(item_label)
|
||||
f_id = item_label
|
||||
elif item_label in QUALITY_FORMATS:
|
||||
quality = quality_from_format_id(format_id)
|
||||
f_id = format_id
|
||||
else:
|
||||
quality, f_id = None
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'format_id': f_id,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'quality': quality,
|
||||
})
|
||||
|
||||
# Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
|
||||
@@ -219,7 +247,7 @@ class NPOIE(NPOBaseIE):
|
||||
stream_info = self._download_json(
|
||||
item_url + '&type=json', video_id,
|
||||
'Downloading %s stream JSON'
|
||||
% item.get('label') or item.get('format') or format_id or num)
|
||||
% item_label or item.get('format') or format_id or num)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||
error = (self._parse_json(
|
||||
|
@@ -19,11 +19,11 @@ from ..utils import (
|
||||
|
||||
|
||||
class OdnoklassnikiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer|live)/(?P<id>[\d-]+)'
|
||||
_TESTS = [{
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc',
|
||||
'md5': '0b62089b479e06681abaaca9d204f152',
|
||||
'info_dict': {
|
||||
'id': '20079905452',
|
||||
'ext': 'mp4',
|
||||
@@ -35,7 +35,6 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Video has been blocked',
|
||||
}, {
|
||||
# metadataUrl
|
||||
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
|
||||
@@ -99,6 +98,9 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://mobile.ok.ru/video/20079905452',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ok.ru/live/484531969818',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -184,6 +186,10 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
})
|
||||
return info
|
||||
|
||||
assert title
|
||||
if provider == 'LIVE_TV_APP':
|
||||
info['title'] = self._live_title(title)
|
||||
|
||||
quality = qualities(('4', '0', '1', '2', '3', '5'))
|
||||
|
||||
formats = [{
|
||||
@@ -210,6 +216,20 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
if fmt_type:
|
||||
fmt['quality'] = quality(fmt_type)
|
||||
|
||||
# Live formats
|
||||
m3u8_url = metadata.get('hlsMasterPlaylistUrl')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8',
|
||||
m3u8_id='hls', fatal=False))
|
||||
rtmp_url = metadata.get('rtmpUrl')
|
||||
if rtmp_url:
|
||||
formats.append({
|
||||
'url': rtmp_url,
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info['formats'] = formats
|
||||
|
@@ -1,9 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -12,7 +16,6 @@ from ..utils import (
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class OoyalaBaseIE(InfoExtractor):
|
||||
@@ -44,7 +47,7 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
|
||||
if not url_data:
|
||||
continue
|
||||
s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8')
|
||||
s_url = compat_b64decode(url_data).decode('utf-8')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
|
@@ -112,6 +112,8 @@ class PhantomJSwrapper(object):
|
||||
return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
|
||||
|
||||
def __init__(self, extractor, required_version=None, timeout=10000):
|
||||
self._TMP_FILES = {}
|
||||
|
||||
self.exe = check_executable('phantomjs', ['-v'])
|
||||
if not self.exe:
|
||||
raise ExtractorError('PhantomJS executable not found in PATH, '
|
||||
@@ -130,7 +132,6 @@ class PhantomJSwrapper(object):
|
||||
self.options = {
|
||||
'timeout': timeout,
|
||||
}
|
||||
self._TMP_FILES = {}
|
||||
for name in self._TMP_FILE_NAMES:
|
||||
tmp = tempfile.NamedTemporaryFile(delete=False)
|
||||
tmp.close()
|
||||
@@ -140,7 +141,7 @@ class PhantomJSwrapper(object):
|
||||
for name in self._TMP_FILE_NAMES:
|
||||
try:
|
||||
os.remove(self._TMP_FILES[name].name)
|
||||
except (IOError, OSError):
|
||||
except (IOError, OSError, KeyError):
|
||||
pass
|
||||
|
||||
def _save_cookies(self, url):
|
||||
@@ -242,7 +243,7 @@ class PhantomJSwrapper(object):
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||
@@ -283,12 +284,20 @@ class OpenloadIE(InfoExtractor):
|
||||
# for title and ext
|
||||
'url': 'https://openload.co/embed/Sxz5sADo82g/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
|
||||
# via https://openload.co/f/e-Ixz9ZR5L0/
|
||||
'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.openload.link/f/KnG-kKZdcfY',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.stream/f/KnG-kKZdcfY',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||
@@ -301,20 +310,34 @@ class OpenloadIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'https://openload.co/embed/%s/' % video_id
|
||||
url_pattern = 'https://openload.co/%%s/%s/' % video_id
|
||||
headers = {
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}
|
||||
|
||||
webpage = self._download_webpage(url, video_id, headers=headers)
|
||||
|
||||
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
||||
raise ExtractorError('File not found', expected=True, video_id=video_id)
|
||||
for path in ('embed', 'f'):
|
||||
page_url = url_pattern % path
|
||||
last = path == 'f'
|
||||
webpage = self._download_webpage(
|
||||
page_url, video_id, 'Downloading %s webpage' % path,
|
||||
headers=headers, fatal=last)
|
||||
if not webpage:
|
||||
continue
|
||||
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
||||
if not last:
|
||||
continue
|
||||
raise ExtractorError('File not found', expected=True, video_id=video_id)
|
||||
break
|
||||
|
||||
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||
webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers)
|
||||
webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
|
||||
|
||||
decoded_id = get_element_by_id('streamurl', webpage)
|
||||
decoded_id = (get_element_by_id('streamurl', webpage) or
|
||||
get_element_by_id('streamuri', webpage) or
|
||||
get_element_by_id('streamurj', webpage))
|
||||
|
||||
if not decoded_id:
|
||||
raise ExtractorError('Can\'t find stream URL', video_id=video_id)
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
||||
|
||||
@@ -323,7 +346,7 @@ class OpenloadIE(InfoExtractor):
|
||||
'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
entries = self._parse_html5_media_entries(page_url, webpage, video_id)
|
||||
entry = entries[0] if entries else {}
|
||||
subtitles = entry.get('subtitles')
|
||||
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
@@ -18,7 +20,14 @@ from ..utils import (
|
||||
class PandoraTVIE(InfoExtractor):
|
||||
IE_NAME = 'pandora.tv'
|
||||
IE_DESC = '판도라TV'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?pandora\.tv/view/(?P<user_id>[^/]+)/(?P<id>\d+)| # new format
|
||||
(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?| # old format
|
||||
m\.pandora\.tv/?\? # mobile
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
|
||||
'info_dict': {
|
||||
@@ -53,14 +62,25 @@ class PandoraTVIE(InfoExtractor):
|
||||
# Test metadata only
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pandora.tv/view/mikakim/53294230#36797454_new',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://m.pandora.tv/?c=view&ch_userid=mikakim&prgid=54600346',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = qs.get('prgid', [None])[0]
|
||||
user_id = qs.get('ch_userid', [None])[0]
|
||||
if any(not f for f in (video_id, user_id,)):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('user_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if not user_id or not video_id:
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = qs.get('prgid', [None])[0]
|
||||
user_id = qs.get('ch_userid', [None])[0]
|
||||
if any(not f for f in (video_id, user_id,)):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
data = self._download_json(
|
||||
'http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid=%s&prgid=%s'
|
||||
|
@@ -56,18 +56,16 @@ class PeriscopeIE(PeriscopeBaseIE):
|
||||
def _real_extract(self, url):
|
||||
token = self._match_id(url)
|
||||
|
||||
broadcast_data = self._call_api(
|
||||
'getBroadcastPublic', {'broadcast_id': token}, token)
|
||||
broadcast = broadcast_data['broadcast']
|
||||
status = broadcast['status']
|
||||
stream = self._call_api(
|
||||
'accessVideoPublic', {'broadcast_id': token}, token)
|
||||
|
||||
user = broadcast_data.get('user', {})
|
||||
broadcast = stream['broadcast']
|
||||
title = broadcast['status']
|
||||
|
||||
uploader = broadcast.get('user_display_name') or user.get('display_name')
|
||||
uploader_id = (broadcast.get('username') or user.get('username') or
|
||||
broadcast.get('user_id') or user.get('id'))
|
||||
uploader = broadcast.get('user_display_name') or broadcast.get('username')
|
||||
uploader_id = (broadcast.get('user_id') or broadcast.get('username'))
|
||||
|
||||
title = '%s - %s' % (uploader, status) if uploader else status
|
||||
title = '%s - %s' % (uploader, title) if uploader else title
|
||||
state = broadcast.get('state').lower()
|
||||
if state == 'running':
|
||||
title = self._live_title(title)
|
||||
@@ -77,9 +75,6 @@ class PeriscopeIE(PeriscopeBaseIE):
|
||||
'url': broadcast[image],
|
||||
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
||||
|
||||
stream = self._call_api(
|
||||
'getAccessPublic', {'broadcast_id': token}, token)
|
||||
|
||||
video_urls = set()
|
||||
formats = []
|
||||
for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
|
||||
|
@@ -4,7 +4,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
xpath_text,
|
||||
@@ -26,17 +28,15 @@ class PladformIE(InfoExtractor):
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# http://muz-tv.ru/kinozal/view/7400/
|
||||
'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
|
||||
'md5': '61f37b575dd27f1bb2e1854777fe31f4',
|
||||
'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0',
|
||||
'md5': '53362fac3a27352da20fa2803cc5cd6f',
|
||||
'info_dict': {
|
||||
'id': '100183293',
|
||||
'id': '3777899',
|
||||
'ext': 'mp4',
|
||||
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
|
||||
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||
'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко',
|
||||
'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 694,
|
||||
'age_limit': 0,
|
||||
'duration': 3190,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
|
||||
@@ -56,22 +56,48 @@ class PladformIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
pl = qs.get('pl', ['1'])[0]
|
||||
|
||||
video = self._download_xml(
|
||||
'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
|
||||
video_id)
|
||||
'http://out.pladform.ru/getVideo', video_id, query={
|
||||
'pl': pl,
|
||||
'videoid': video_id,
|
||||
})
|
||||
|
||||
def fail(text):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, text),
|
||||
expected=True)
|
||||
|
||||
if video.tag == 'error':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, video.text),
|
||||
expected=True)
|
||||
fail(video.text)
|
||||
|
||||
quality = qualities(('ld', 'sd', 'hd'))
|
||||
|
||||
formats = [{
|
||||
'url': src.text,
|
||||
'format_id': src.get('quality'),
|
||||
'quality': quality(src.get('quality')),
|
||||
} for src in video.findall('./src')]
|
||||
formats = []
|
||||
for src in video.findall('./src'):
|
||||
if src is None:
|
||||
continue
|
||||
format_url = src.text
|
||||
if not format_url:
|
||||
continue
|
||||
if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': src.text,
|
||||
'format_id': src.get('quality'),
|
||||
'quality': quality(src.get('quality')),
|
||||
})
|
||||
|
||||
if not formats:
|
||||
error = xpath_text(video, './cap', 'error', default=None)
|
||||
if error:
|
||||
fail(error)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
|
@@ -24,7 +24,7 @@ class PlaytvakIE(InfoExtractor):
|
||||
'id': 'A150730_150323_hodinovy-manzel_kuko',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vyžeňte vosy a sršně ze zahrady',
|
||||
'description': 'md5:f93d398691044d303bc4a3de62f3e976',
|
||||
'description': 'md5:4436e61b7df227a093778efb7e373571',
|
||||
'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'duration': 279,
|
||||
'timestamp': 1438732860,
|
||||
@@ -36,9 +36,19 @@ class PlaytvakIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'A150624_164934_planespotting_cat',
|
||||
'ext': 'flv',
|
||||
'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
|
||||
'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}, { # another live stream, this one without Misc.videoFLV
|
||||
'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap',
|
||||
'info_dict': {
|
||||
'id': 'A151218_145728_hlavni-nadrazi_plap',
|
||||
'ext': 'flv',
|
||||
'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@@ -95,7 +105,7 @@ class PlaytvakIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info_url = self._html_search_regex(
|
||||
r'Misc\.videoFLV\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
|
||||
r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(info_url)
|
||||
|
||||
@@ -160,7 +170,7 @@ class PlaytvakIE(InfoExtractor):
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
'description', webpage, 'description', default=None)
|
||||
timestamp = None
|
||||
duration = None
|
||||
if not is_live:
|
||||
|
@@ -171,12 +171,12 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
for num, current in enumerate(subs):
|
||||
current = subs[num]
|
||||
start, text = (
|
||||
float_or_none(dict_get(current, TIME_OFFSET_KEYS)),
|
||||
float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)),
|
||||
dict_get(current, TEXT_KEYS))
|
||||
if start is None or text is None:
|
||||
continue
|
||||
end = duration if num == len(subs) - 1 else float_or_none(
|
||||
dict_get(subs[num + 1], TIME_OFFSET_KEYS))
|
||||
dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False))
|
||||
if end is None:
|
||||
continue
|
||||
srt += os.linesep.join(
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user