Compare commits

..

95 Commits

Author SHA1 Message Date
Sergey M․
d24bd4ffdb release 2017.04.15 2017-04-15 01:39:33 +07:00
Sergey M․
b9859e2735 [ChangeLog] Actualize 2017-04-15 01:37:26 +07:00
Sergey M․
34c3bbe1bd [youku] Keep old fileid extraction code as fallback (#12741) 2017-04-15 01:28:02 +07:00
xuhaomin
e5106ba402 [youku] Fix fileid extraction 2017-04-14 13:23:02 -05:00
Sergey M․
38d2f8325f release 2017.04.14 2017-04-14 00:31:49 +07:00
Sergey M․
6f4dd6667b [ChangeLog] Actualize 2017-04-14 00:29:36 +07:00
Sergey M․
95728fda70 [aenetworks] Add support for specials (closes #12723) 2017-04-13 23:39:55 +07:00
Remita Amine
3f7409f745 [hbo] extract m3u8 formats 2017-04-13 12:28:22 +01:00
Remita Amine
b2fff30817 [go90] Add new extractor(closes #10127) 2017-04-13 12:28:22 +01:00
Remita Amine
f59746372a [downloader/hls] add basic support for EXT-X-BYTERANGE tag(#10955) 2017-04-13 12:28:22 +01:00
Yen Chi Hsuan
34d98cc411 [tv2hu] Fix invalid escape sequence on Py3.6 2017-04-13 16:52:52 +08:00
Remita Amine
40fcba5edb improve coding style 2017-04-12 20:38:43 +01:00
Remita Amine
e4d74e2778 [tv2hu] improve extraction 2017-04-12 19:44:32 +01:00
Vukkk
3ef1d0c733 [tv2hu] Add new extractor 2017-04-12 19:36:02 +01:00
Remita Amine
3962260b7d Credit @SkiTheSlicer for Verizon adobe pass support (#12721) 2017-04-12 16:54:53 +01:00
Remita Amine
0ee79a378a [generic] exclude urls with xml ext from valid video urls(closes #10768)(closes #11654) 2017-04-12 16:22:08 +01:00
Remita Amine
79a51069e5 [adobepass] improve comcast and verison login code(closes #10803) 2017-04-12 15:19:58 +01:00
Daniel Twardowski
a9a346535b [adobepass] Add Verizon support (#10803) 2017-04-12 01:18:57 -04:00
Remita Amine
89beedd31f [youtube] improve m3u8 format extraction 2017-04-11 21:55:59 +01:00
Sergey M․
e109f1ff43 [afreecatv] Fix extraction for videos with different key layout (closes #12718) 2017-04-12 02:17:34 +07:00
Sergey M․
d23028a8fb [youtube] Remove explicit preference for audio-only and video-only formats
In order not to break sorting when new formats appear
2017-04-11 22:41:48 +07:00
Sergey M․
6214611a4a [canalplus] Bypass geo restriction 2017-04-11 22:30:15 +07:00
Sergey M․
1730878167 release 2017.04.11 2017-04-11 02:17:53 +07:00
Sergey M․
689cd458a6 [ChangeLog] Actualize 2017-04-11 02:16:20 +07:00
Sergey M․
6b9466de2f [afreecatv] Fix extraction (closes #12706) 2017-04-11 02:05:53 +07:00
John Hawkinson
61568e50cf [generic] Add support for <object> youtube embeds (closes #12637) 2017-04-11 00:04:32 +07:00
Sergey M․
364a69e8c6 [test_download] Fix testing playlists with single video and add comments 2017-04-11 00:01:02 +07:00
Sergey M․
6240925b40 [bbccouk] Treat bitrate as audio+video bitrate in media selector 2017-04-10 22:56:22 +07:00
Sergey M․
964744af95 [bbccouk] Skip unrecognized formats in media selector (#12701) 2017-04-10 22:53:51 +07:00
Sergey M․
1af959ef9f [bbccouk] Add support for https protocol in media selector (closes #12701) 2017-04-10 22:53:06 +07:00
Remita Amine
a206ef62df [curiositystream] fix extraction(closes #12638) 2017-04-10 13:50:08 +01:00
Remita Amine
3f2ce6896a [adn] update subtitle decryption key 2017-04-09 12:33:29 +01:00
Sergey M․
a6f7263cf4 [chaturbate] Fix extraction (closes #12665) 2017-04-09 01:39:40 +07:00
Sergey M․
4372436504 release 2017.04.09 2017-04-09 00:01:28 +07:00
Sergey M․
eb8cc8ea3b [ChangeLog] Actualize 2017-04-08 23:59:17 +07:00
Sergey M․
41b263ac8a [canvas] Add IE_DESC (closes #12478) 2017-04-08 22:45:45 +07:00
Sergey M․
ca8fca9d9d [vrt] Add IE_DESC (closes #12477) 2017-04-08 22:44:31 +07:00
midas02
e129fa0846 [vier] Add IE_DESC 2017-04-08 22:43:29 +07:00
Sergey M․
2bd875edfe [medici] Add extractor (closes #3406) 2017-04-08 22:38:37 +07:00
Steven Maude
95152630db [rbmaradio] Add support for redbullradio.com URLs 2017-04-08 21:39:07 +07:00
Sergey M․
04e431cf97 [npo:live] Improve (closes #12555) 2017-04-08 21:31:22 +07:00
Aldo Gunsing
1591ba258a [npo:live] Add support for default url 2017-04-08 21:30:38 +07:00
Sergey M․
29c6726646 [mixcloud] Fix view count extraction and modernize 2017-04-08 21:11:08 +07:00
Sergey M․
a66e25859a [mixcloud:playlist] Relax title extraction and fix description extraction (closes #12582) 2017-04-08 21:04:09 +07:00
Kfir Breger
c93c0fc2fd [mixcloud:playlist] Fix title extraction 2017-04-08 20:47:07 +07:00
Sergey M․
90e3f18fc1 [thesun] Extract playlists (closes #11298, closes #12674) 2017-04-08 20:08:31 +07:00
Entropy
5f3e0b69ef [TheSun] Add new extractor 2017-04-08 19:54:04 +07:00
Sergey M․
28b674ca23 [ceskateleveize:porady] Add extractor (closes #7411, closes #12645) 2017-04-08 19:46:42 +07:00
Sergey M․
e18f1da97a [ceskateleveize] Improve extraction and remove URL replacement hacks 2017-04-08 19:41:14 +07:00
Sergey M․
78280352ca [kaltura] Cleanup regexes from redundant escaping 2017-04-08 16:48:27 +07:00
John Hawkinson
a01825a541 [kaltura] Add support for iframe embeds 2017-04-08 16:40:57 +07:00
Sergey M․
f8f2da25ab [wshh] Extract html5 entries and delegate to generic extractor (closes #12676) 2017-04-08 16:01:56 +07:00
Yen Chi Hsuan
4c03973296 [airmozilla] Fix extraction (closes #12670) 2017-04-08 15:39:58 +08:00
Sergey M․
60e5016199 [test_download] Remove unused import 2017-04-08 14:19:01 +07:00
Sergey M․
c4d6fc6d65 [test_subtitles] Fix raiplay test 2017-04-08 14:11:20 +07:00
Sergey M․
1b3feca0a7 [raiplay] Extract subtitles 2017-04-08 14:11:03 +07:00
Sergey M․
80b2fdf9ac [test_download] Match info dicts against tests before matching info file 2017-04-08 14:10:12 +07:00
John Hawkinson
3bef10a50c [test_download] typo in comment 2017-04-08 13:07:25 +07:00
John Hawkinson
a84da06f49 [test_download] Improve diagnostic on wrong 'id' 2017-04-08 13:07:25 +07:00
Sergey M․
3461f5db06 [xfileshare] Add support for vidlo.us (closes #12660) 2017-04-06 23:39:09 +07:00
Sergey M․
0378b8b917 [xfileshare] Add support for vidbom.com (closes #12661) 2017-04-06 23:38:16 +07:00
Sergey M․
7f04386b89 [aenetworks] Add more video URL regexes (closes #12657) 2017-04-06 02:36:48 +07:00
Remita Amine
fac39cccd4 [odnoklassniki] fix format sorting for 1080p quality 2017-04-03 23:39:56 +01:00
Remita Amine
b68e00b08a [rtl2] add support for you.rtl2.de(closes #10257) 2017-04-03 21:36:35 +01:00
Sergey M․
2ab0bfcd81 [vshare] Add extractor (closes #12278) 2017-04-04 03:05:18 +07:00
Sergey M․
b022f4f600 release 2017.04.03 2017-04-03 03:53:55 +07:00
Sergey M․
e2435ba5f3 [ChangeLog] Actualize 2017-04-03 03:52:44 +07:00
Remita Amine
a9bb61a425 [discoveryvr] Add new extractor(closes #12578) 2017-04-02 09:22:09 +01:00
Remita Amine
dbf70c489f [tv5mondeplus] clean description and use stable id 2017-04-02 00:26:48 +01:00
Remita Amine
61e2331ad8 [tv5mondeplus] Add new extractor(closes #11386) 2017-04-01 23:49:40 +01:00
Sergey M․
fd47550885 [extractor/common] Add coding cookie 2017-04-02 04:42:10 +07:00
Sergey M․
4457823dda [extractor/common] Move censorship checks to a separate method and add check for just another ISP 2017-04-02 03:57:44 +07:00
Sergey M․
b3633fa0ce [pericope] Add support for pscp.tv URLs 2017-04-02 03:20:28 +07:00
Sergey M․
b56e41a701 release 2017.04.02 2017-04-02 02:39:15 +07:00
Sergey M․
a76c25146a [ChangeLog] Actualize 2017-04-02 02:37:18 +07:00
Sergey M․
361f293ab8 [rai] Skip not found content item id 2017-04-02 02:24:13 +07:00
Sergey M․
b8d8cced9b [rai] Improve extraction (closes #11790)
* Fix georestriction detection
* Detect live streams
+ Extract relinker metadata
* Improve ContentItem detection
+ Extract series metadata
* Fix tests
2017-04-02 02:14:42 +07:00
Timendum
51342717cd [rai] Fix extraction 2017-04-02 02:10:53 +07:00
Remita Amine
48ab554feb [vrv] add support for series pages 2017-04-01 18:09:36 +01:00
Remita Amine
a6f3a162f3 [limelight] improve extraction for audio only formats 2017-04-01 15:35:39 +01:00
Remita Amine
91399b2fcc [funimation] fix extraction(closes #10696)(#11773) 2017-04-01 13:33:04 +01:00
Sergey M․
eecea00d36 [xfileshare] Add support for vidabc.com (closes #12589) 2017-04-01 18:56:35 +07:00
Sergey M․
2cd668ee59 [xfileshare] Improve extraction and extract hls formats 2017-04-01 18:55:48 +07:00
Remita Amine
ca77b92f94 [crunchyroll] pass geo verifcation proxy 2017-04-01 09:33:23 +01:00
Remita Amine
e97fc8d6b8 [cwtv] extract ISM formats 2017-04-01 07:50:24 +01:00
Remita Amine
be61efdf17 [tvplay] Bypass geo restriction 2017-04-01 07:26:40 +01:00
Remita Amine
77c8ebe631 [vrv] Add new extractor 2017-03-31 23:29:23 +01:00
Sergey M․
7453999580 [packtpub] Add extractor (closes #12610) 2017-04-01 00:25:27 +07:00
Sergey M․
1640eb0961 [YoutubeDL] Return early when extraction of url_transparent fails 2017-03-31 23:57:35 +07:00
Remita Amine
3e943cfe09 [generic] pass base_url to _parse_jwplayer_data 2017-03-31 14:54:06 +01:00
Remita Amine
82be732b17 [adn] Add new extractor 2017-03-31 12:24:23 +01:00
Sergey M․
639e5b2a84 [allocine] Extract more metadata 2017-03-29 04:43:12 +07:00
plroman
128244657b [allocine] Fix extraction 2017-03-29 05:23:20 +08:00
Sergey M․
12ee65ea0d [options] Mention ISM for --fragment-retries and --skip-unavailable-fragments 2017-03-28 23:35:48 +07:00
Tithen-Firion
aea1dccbd0 [openload] fix extractor 2017-03-29 00:00:09 +08:00
63 changed files with 2322 additions and 736 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.26*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.26**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.15**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.03.26
[debug] youtube-dl version 2017.04.15
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -210,3 +210,4 @@ Lars Vierbergen
Juanjo Benages
Xiao Di Guan
Thomas Winant
Daniel Twardowski

View File

@@ -1,3 +1,100 @@
version 2017.04.15
Extractors
* [youku] Fix fileid extraction (#12741, #12743)
version 2017.04.14
Core
+ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
+ [adobepass] Improve Comcast and Verison login code (#10803)
+ [adobepass] Add support for Verizon (#10803)
Extractors
+ [aenetworks] Add support for specials (#12723)
+ [hbo] Extract HLS formats
+ [go90] Add support for go90.com (#10127)
+ [tv2hu] Add support for tv2.hu (#10509)
+ [generic] Exclude URLs with xml ext from valid video URLs (#10768, #11654)
* [youtube] Improve HLS formats extraction
* [afreecatv] Fix extraction for videos with different key layout (#12718)
- [youtube] Remove explicit preference for audio-only and video-only formats in
order not to break sorting when new formats appear
* [canalplus] Bypass geo restriction
version 2017.04.11
Extractors
* [afreecatv] Fix extraction (#12706)
+ [generic] Add support for <object> YouTube embeds (#12637)
* [bbccouk] Treat bitrate as audio+video bitrate in media selector
+ [bbccouk] Skip unrecognized formats in media selector (#12701)
+ [bbccouk] Add support for https protocol in media selector (#12701)
* [curiositystream] Fix extraction (#12638)
* [adn] Update subtitle decryption key
* [chaturbate] Fix extraction (#12665, #12688, #12690)
version 2017.04.09
Extractors
+ [medici] Add support for medici.tv (#3406)
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
+ [npo:live] Add support for default URL (#12555)
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
+ [thesun] Add suport for thesun.co.uk (#11298, #12674)
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
* [ceskateleveize] Improve extraction and remove URL replacement hacks
+ [kaltura] Add support for iframe embeds (#12679)
* [airmozilla] Fix extraction (#12670)
* [wshh] Extract html5 entries and delegate to generic extractor (12676)
+ [raiplay] Extract subtitles
+ [xfileshare] Add support for vidlo.us (#12660)
+ [xfileshare] Add support for vidbom.com (#12661)
+ [aenetworks] Add more video URL regular expressions (#12657)
+ [odnoklassniki] Fix format sorting for 1080p quality
+ [rtl2] Add support for you.rtl2.de (#10257)
+ [vshare] Add support for vshare.io (#12278)
version 2017.04.03
Core
+ [extractor/common] Add censorship check for TransTelekom ISP
* [extractor/common] Move censorship checks to a separate method
Extractors
+ [discoveryvr] Add support for discoveryvr.com (#12578)
+ [tv5mondeplus] Add support for tv5mondeplus.com (#11386)
+ [periscope] Add support for pscp.tv URLs (#12618, #12625)
version 2017.04.02
Core
* [YoutubeDL] Return early when extraction of url_transparent fails
Extractors
* [rai] Fix and improve extraction (#11790)
+ [vrv] Add support for series pages
* [limelight] Improve extraction for audio only formats
* [funimation] Fix extraction (#10696, #11773)
+ [xfileshare] Add support for vidabc.com (#12589)
+ [xfileshare] Improve extraction and extract hls formats
+ [crunchyroll] Pass geo verifcation proxy
+ [cwtv] Extract ISM formats
+ [tvplay] Bypass geo restriction
+ [vrv] Add support for vrv.co
+ [packtpub] Add support for packtpub.com (#12610)
+ [generic] Pass base_url to _parse_jwplayer_data
+ [adn] Add support for animedigitalnetwork.fr (#4866)
+ [allocine] Extract more metadata
* [allocine] Fix extraction (#12592)
* [openload] Fix extraction
version 2017.03.26
Core

View File

@@ -181,10 +181,10 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
-R, --retries RETRIES Number of retries (default is 10), or
"infinite".
--fragment-retries RETRIES Number of retries for a fragment (default
is 10), or "infinite" (DASH and hlsnative
only)
--skip-unavailable-fragments Skip unavailable fragments (DASH and
hlsnative only)
is 10), or "infinite" (DASH, hlsnative and
ISM)
--skip-unavailable-fragments Skip unavailable fragments (DASH, hlsnative
and ISM)
--abort-on-unavailable-fragment Abort downloading when some fragment is not
available
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)

View File

@@ -28,6 +28,7 @@
- **acast**
- **acast:channel**
- **AddAnime**
- **ADN**: Anime Digital Network
- **AdobeTV**
- **AdobeTVChannel**
- **AdobeTVShow**
@@ -126,7 +127,7 @@
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **Canvas**
- **Canvas**: canvas.be and een.be
- **CarambaTV**
- **CarambaTVPage**
- **CartoonNetwork**
@@ -144,6 +145,7 @@
- **CCTV**: 央视网
- **CDA**
- **CeskaTelevize**
- **CeskaTelevizePorady**
- **channel9**: Channel 9
- **CharlieRose**
- **Chaturbate**
@@ -212,6 +214,7 @@
- **DiscoveryGo**
- **DiscoveryGoPlaylist**
- **DiscoveryNetworksDe**
- **DiscoveryVR**
- **Disney**
- **Dotsub**
- **DouyuTV**: 斗鱼
@@ -305,6 +308,7 @@
- **Globo**
- **GloboArticle**
- **Go**
- **Go90**
- **GodTube**
- **GodTV**
- **Golem**
@@ -429,6 +433,7 @@
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **Medialaan**
- **Medici**
- **Meipai**: 美拍
- **MelonVOD**
- **META**
@@ -572,6 +577,8 @@
- **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek
- **PacktPub**
- **PacktPubCourse**
- **PandaTV**: 熊猫TV
- **pandora.tv**: 판도라TV
- **parliamentlive.tv**: UK parliament videos
@@ -629,7 +636,7 @@
- **radiofrance**
- **RadioJavan**
- **Rai**
- **RaiTV**
- **RaiPlay**
- **RBMARadio**
- **RDS**: RDS.ca
- **RedBullTV**
@@ -654,7 +661,9 @@
- **rte**: Raidió Teilifís Éireann TV
- **rte:radio**: Raidió Teilifís Éireann radio
- **rtl.nl**: rtl.nl and rtlxl.nl
- **RTL2**
- **rtl2**
- **rtl2:you**
- **rtl2:you:series**
- **RTP**
- **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta
@@ -776,6 +785,7 @@
- **TheScene**
- **TheSixtyOne**
- **TheStar**
- **TheSun**
- **TheWeatherChannel**
- **ThisAmericanLife**
- **ThisAV**
@@ -809,9 +819,11 @@
- **Tutv**
- **tv.dfb.de**
- **TV2**
- **tv2.hu**
- **TV2Article**
- **TV3**
- **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **TVA**
- **TVANouvelles**
- **TVANouvellesArticle**
@@ -888,7 +900,7 @@
- **vidme:user**
- **vidme:user:likes**
- **Vidzi**
- **vier**
- **vier**: vier.be and vijf.be
- **vier:videos**
- **ViewLift**
- **ViewLiftEmbed**
@@ -925,7 +937,10 @@
- **Vporn**
- **vpro**: npo.nl and ntr.nl
- **Vrak**
- **VRT**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **vrv**
- **vrv:series**
- **VShare**
- **vube**: Vube.com
- **VuClip**
- **VVVVID**
@@ -953,7 +968,7 @@
- **WSJ**: Wall Street Journal
- **XBef**
- **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
- **XHamster**
- **XHamsterEmbed**
- **xiami:album**: 虾米音乐 - 专辑

View File

@@ -151,7 +151,7 @@ def generator(test_case, tname):
try_num = 1
while True:
try:
# We're not using .download here sine that is just a shim
# We're not using .download here since that is just a shim
# for outside error handling, and returns the exit code
# instead of the result dict.
res_dict = ydl.extract_info(
@@ -199,7 +199,16 @@ def generator(test_case, tname):
self.assertEqual(
test_case['playlist_duration_sum'], got_duration)
for tc in test_cases:
# Generalize both playlists and single videos to unified format for
# simplicity
if 'entries' not in res_dict:
res_dict['entries'] = [res_dict]
for tc_num, tc in enumerate(test_cases):
tc_res_dict = res_dict['entries'][tc_num]
# First, check test cases' data against extracted data alone
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
# Now, check downloaded file consistency
tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
@@ -217,13 +226,14 @@ def generator(test_case, tname):
if 'md5' in tc:
md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5'])
# Finally, check test cases' data again but this time against
# extracted data from info JSON file written during processing
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
self.assertTrue(
os.path.exists(info_json_fn),
'Missing info file %s' % info_json_fn)
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
finally:
try_rm_tcs_files()

View File

@@ -21,7 +21,7 @@ from youtube_dl.extractor import (
NPOIE,
ComedyCentralIE,
NRKTVIE,
RaiTVIE,
RaiPlayIE,
VikiIE,
ThePlatformIE,
ThePlatformFeedIE,
@@ -258,9 +258,9 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
class TestRaiSubtitles(BaseTestSubtitles):
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiTVIE
class TestRaiPlaySubtitles(BaseTestSubtitles):
url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiPlayIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True

View File

@@ -837,6 +837,12 @@ class YoutubeDL(object):
ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False)
# extract_info may return None when ignoreerrors is enabled and
# extraction failed with an error, don't crash and return early
# in this case
if not info:
return info
force_properties = dict(
(k, v) for k, v in ie_result.items() if v is not None)
for f in ('_type', 'url', 'ie_key'):
@@ -849,7 +855,7 @@ class YoutubeDL(object):
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
elif result_type == 'playlist' or result_type == 'multi_video':
elif result_type in ('playlist', 'multi_video'):
# We process each entry in the playlist
playlist = ie_result.get('title') or ie_result.get('id')
self.to_screen('[download] Downloading playlist: %s' % playlist)

View File

@@ -2692,7 +2692,7 @@ else:
userhome = pwent.pw_dir
userhome = userhome.rstrip('/')
return (userhome + path[i:]) or '/'
elif compat_os_name == 'nt' or compat_os_name == 'ce':
elif compat_os_name in ('nt', 'ce'):
def compat_expanduser(path):
"""Expand ~ and ~user constructs.

View File

@@ -34,7 +34,7 @@ class HlsFD(FragmentFD):
def can_download(manifest, info_dict):
UNSUPPORTED_FEATURES = (
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
# Live streams heuristic does not always work (e.g. geo restricted to Germany
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
@@ -52,7 +52,9 @@ class HlsFD(FragmentFD):
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
)
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
check_results.append(can_decrypt_frag or not is_aes128_enc)
check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
check_results.append(not info_dict.get('is_live'))
return all(check_results)
@@ -100,6 +102,7 @@ class HlsFD(FragmentFD):
i = 0
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
byte_range = {}
frags_filenames = []
for line in s.splitlines():
line = line.strip()
@@ -114,11 +117,14 @@ class HlsFD(FragmentFD):
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
count = 0
headers = info_dict.get('http_headers', {})
if byte_range:
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
while count <= fragment_retries:
try:
success = ctx['dl'].download(frag_filename, {
'url': frag_url,
'http_headers': info_dict.get('http_headers'),
'http_headers': headers,
})
if not success:
return False
@@ -167,6 +173,13 @@ class HlsFD(FragmentFD):
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
media_sequence = int(line[22:])
elif line.startswith('#EXT-X-BYTERANGE'):
splitted_byte_range = line[17:].split('@')
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
byte_range = {
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
self._finish_frag_download(ctx)

View File

@@ -169,7 +169,7 @@ class RtmpFD(FileDownloader):
self.report_error('[rtmpdump] Could not connect to RTMP server.')
return False
while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed

136
youtube_dl/extractor/adn.py Normal file
View File

@@ -0,0 +1,136 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import json
import os
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt
from ..compat import compat_ord
from ..utils import (
bytes_to_intlist,
ExtractorError,
float_or_none,
intlist_to_bytes,
srt_subtitles_timecode,
strip_or_none,
)
class ADNIE(InfoExtractor):
IE_DESC = 'Anime Digital Network'
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'md5': 'e497370d847fd79d9d4c74be55575c7a',
'info_dict': {
'id': '7778',
'ext': 'mp4',
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
}
}
def _get_subtitles(self, sub_path, video_id):
if not sub_path:
return None
enc_subtitles = self._download_webpage(
'http://animedigitalnetwork.fr/' + sub_path,
video_id, fatal=False)
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
dec_subtitles[:-compat_ord(dec_subtitles[-1])],
None, fatal=False)
if not subtitles_json:
return None
subtitles = {}
for sub_lang, sub in subtitles_json.items():
srt = ''
for num, current in enumerate(sub):
start, end, text = (
float_or_none(current.get('startTime')),
float_or_none(current.get('endTime')),
current.get('text'))
if start is None or end is None or text is None:
continue
srt += os.linesep.join(
(
'%d' % num,
'%s --> %s' % (
srt_subtitles_timecode(start),
srt_subtitles_timecode(end)),
text,
os.linesep,
))
if sub_lang == 'vostf':
sub_lang = 'fr'
subtitles.setdefault(sub_lang, []).extend([{
'ext': 'json',
'data': json.dumps(sub),
}, {
'ext': 'srt',
'data': srt,
}])
return subtitles
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_config = self._parse_json(self._search_regex(
r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id)
video_info = {}
video_info_str = self._search_regex(
r'videoInfo\s*=\s*({.+});', webpage,
'video info', fatal=False)
if video_info_str:
video_info = self._parse_json(
video_info_str, video_id, fatal=False) or {}
options = player_config.get('options') or {}
metas = options.get('metas') or {}
title = metas.get('title') or video_info['title']
links = player_config.get('links') or {}
formats = []
for format_id, qualities in links.items():
for load_balancer_url in qualities.values():
load_balancer_data = self._download_json(
load_balancer_url, video_id, fatal=False) or {}
m3u8_url = load_balancer_data.get('location')
if not m3u8_url:
continue
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False)
if format_id == 'vf':
for f in m3u8_formats:
f['language'] = 'fr'
formats.extend(m3u8_formats)
error = options.get('error')
if not formats and error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
'thumbnail': video_info.get('image'),
'formats': formats,
'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
'series': video_info.get('playlistTitle'),
}

View File

@@ -41,6 +41,11 @@ MSO_INFO = {
'username_field': 'IDToken1',
'password_field': 'IDToken2',
},
'Verizon': {
'name': 'Verizon FiOS',
'username_field': 'IDToken1',
'password_field': 'IDToken2',
},
'thr030': {
'name': '3 Rivers Communications'
},
@@ -1384,40 +1389,72 @@ class AdobePassIE(InfoExtractor):
# Comcast page flow varies by video site and whether you
# are on Comcast's network.
provider_redirect_page, urlh = provider_redirect_page_res
# Check for Comcast auto login
if 'automatically signing you in' in provider_redirect_page:
oauth_redirect_url = self._html_search_regex(
r'window\.location\s*=\s*[\'"]([^\'"]+)',
provider_redirect_page, 'oauth redirect')
# Just need to process the request. No useful data comes back
self._download_webpage(
oauth_redirect_url, video_id, 'Confirming auto login')
else:
if '<form name="signin"' in provider_redirect_page:
# already have the form, just fill it
provider_login_page_res = provider_redirect_page_res
elif 'http-equiv="refresh"' in provider_redirect_page:
# redirects to the login page
oauth_redirect_url = self._html_search_regex(
r'content="0;\s*url=([^\'"]+)',
provider_redirect_page, 'meta refresh redirect')
provider_login_page_res = self._download_webpage_handle(
oauth_redirect_url,
video_id, 'Downloading Provider Login Page')
oauth_redirect_url, video_id,
'Downloading Provider Login Page')
else:
provider_login_page_res = post_form(
provider_redirect_page_res, 'Downloading Provider Login Page')
provider_redirect_page_res,
'Downloading Provider Login Page')
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
mso_info.get('username_field', 'username'): username,
mso_info.get('password_field', 'password'): password,
})
mvpd_confirm_page_res = post_form(
provider_login_page_res, 'Logging in', {
mso_info['username_field']: username,
mso_info['password_field']: password,
})
mvpd_confirm_page, urlh = mvpd_confirm_page_res
if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
post_form(mvpd_confirm_page_res, 'Confirming Login')
elif mso_id == 'Verizon':
# In general, if you're connecting from a Verizon-assigned IP,
# you will not actually pass your credentials.
provider_redirect_page, urlh = provider_redirect_page_res
if 'Please wait ...' in provider_redirect_page:
saml_redirect_url = self._html_search_regex(
r'self\.parent\.location=(["\'])(?P<url>.+?)\1',
provider_redirect_page,
'SAML Redirect URL', group='url')
saml_login_page = self._download_webpage(
saml_redirect_url, video_id,
'Downloading SAML Login Page')
else:
saml_login_page_res = post_form(
provider_redirect_page_res, 'Logging in', {
mso_info['username_field']: username,
mso_info['password_field']: password,
})
saml_login_page, urlh = saml_login_page_res
if 'Please try again.' in saml_login_page:
raise ExtractorError(
'We\'re sorry, but either the User ID or Password entered is not correct.')
saml_login_url = self._search_regex(
r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P<url>.+?)\1',
saml_login_page, 'SAML Login URL', group='url')
saml_response_json = self._download_json(
saml_login_url, video_id, 'Downloading SAML Response',
headers={'Content-Type': 'text/xml'})
self._download_webpage(
saml_response_json['targetValue'], video_id,
'Confirming Login', data=urlencode_postdata({
'SAMLResponse': saml_response_json['SAMLResponse'],
'RelayState': saml_response_json['RelayState']
}), headers={
'Content-Type': 'application/x-www-form-urlencoded'
})
else:
# Normal, non-Comcast flow
provider_login_page_res = post_form(
provider_redirect_page_res, 'Downloading Provider Login Page')
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {

View File

@@ -23,7 +23,19 @@ class AENetworksBaseIE(ThePlatformIE):
class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime|lifetimemovieclub)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?)'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?P<domain>
(?:history|aetv|mylifetime|lifetimemovieclub)\.com|
fyi\.tv
)/
(?:
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
specials/(?P<special_display_id>[^/]+)/full-special
)
'''
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
@@ -65,6 +77,9 @@ class AENetworksIE(AENetworksBaseIE):
}, {
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
'only_matching': True
}, {
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
'only_matching': True
}]
_DOMAIN_TO_REQUESTOR_ID = {
'history.com': 'HISTORY',
@@ -75,8 +90,8 @@ class AENetworksIE(AENetworksBaseIE):
}
def _real_extract(self, url):
domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
display_id = show_path or movie_display_id
domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
display_id = show_path or movie_display_id or special_display_id
webpage = self._download_webpage(url, display_id)
if show_path:
url_parts = show_path.split('/')
@@ -107,7 +122,10 @@ class AENetworksIE(AENetworksBaseIE):
}
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
[r"media_url\s*=\s*'(?P<url>[^']+)'",
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
webpage, 'video url', group='url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_xpath
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
xpath_text,
@@ -72,13 +73,70 @@ class AfreecaTVIE(InfoExtractor):
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
'info_dict': {
'id': '18650793',
'ext': 'flv',
'ext': 'mp4',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '윈아디',
'uploader_id': 'badkids',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'duration': 107,
},
'params': {
'skip_download': True, # requires rtmpdump
'skip_download': True,
},
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
'info_dict': {
'id': '10481652',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'duration': 6492,
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '20160502_c4c62b9d_174361386_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 3601,
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '20160502_39e739bb_174361386_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 2891,
},
}],
'params': {
'skip_download': True,
},
}, {
# non standard key
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
'info_dict': {
'id': '20170411_BE689A0E_190960999_1_2_h',
'ext': 'mp4',
'title': '혼자사는여자집',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': '♥이슬이',
'uploader_id': 'dasl8121',
'upload_date': '20170411',
'duration': 213,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
@@ -94,7 +152,7 @@ class AfreecaTVIE(InfoExtractor):
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
if m:
video_key['upload_date'] = m.group('upload_date')
video_key['part'] = m.group('part')
video_key['part'] = int(m.group('part'))
return video_key
def _real_extract(self, url):
@@ -109,23 +167,65 @@ class AfreecaTVIE(InfoExtractor):
raise ExtractorError('Specified AfreecaTV video does not exist',
expected=True)
video_url_raw = video_element.text
app, playpath = video_url_raw.split('mp4:')
video_url = video_element.text.strip()
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
duration = int_or_none(xpath_text(video_xml, './track/duration',
'duration'))
duration = int_or_none(xpath_text(
video_xml, './track/duration', 'duration'))
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
return {
common_entry = {
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
}
info = common_entry.copy()
info.update({
'id': video_id,
'title': title,
'duration': duration,
})
if not video_url:
entries = []
file_elements = video_element.findall(compat_xpath('./file'))
one = len(file_elements) == 1
for file_num, file_element in enumerate(file_elements, start=1):
file_url = file_element.text
if not file_url:
continue
key = file_element.get('key', '')
upload_date = self._search_regex(
r'^(\d{8})_', key, 'upload date', default=None)
file_duration = int_or_none(file_element.get('duration'))
format_id = key if key else '%s_%s' % (video_id, file_num)
formats = self._extract_m3u8_formats(
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls',
note='Downloading part %d m3u8 information' % file_num)
title = title if one else '%s (part %d)' % (title, file_num)
file_info = common_entry.copy()
file_info.update({
'id': format_id,
'title': title,
'upload_date': upload_date,
'duration': file_duration,
'formats': formats,
})
entries.append(file_info)
entries_info = info.copy()
entries_info.update({
'_type': 'multi_video',
'entries': entries,
})
return entries_info
info = {
'id': video_id,
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
'title': title,
'uploader': uploader,
'uploader_id': uploader_id,
@@ -133,6 +233,21 @@ class AfreecaTVIE(InfoExtractor):
'thumbnail': thumbnail,
}
if determine_ext(video_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
else:
app, playpath = video_url.split('mp4:')
info.update({
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
})
return info
class AfreecaTVGlobalIE(AfreecaTVIE):
IE_NAME = 'afreecatv:global'

View File

@@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
'md5': '8d02f53ee39cf006009180e21df1f3ba',
'info_dict': {
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': r're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
'thumbnail': r're:https?://.*/poster\.jpg',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
@@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
metadata = self._parse_json(jwconfig, video_id)
formats = [{
'url': source['file'],
'ext': source['type'],
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
'format': source['label'],
'height': int(source['label'].rstrip('p')),
} for source in metadata['playlist'][0]['sources']]
self._sort_formats(formats)
jwconfig = self._parse_json(self._search_regex(
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False))
@@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor):
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False))
return {
info_dict.update({
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'url': self._og_search_url(webpage),
'display_id': display_id,
'thumbnail': metadata['playlist'][0].get('image'),
'description': self._og_search_description(webpage),
'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration,
'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
}
})
return info_dict

View File

@@ -2,9 +2,13 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
remove_end,
int_or_none,
qualities,
remove_end,
try_get,
unified_timestamp,
url_basename,
)
@@ -22,6 +26,10 @@ class AllocineIE(InfoExtractor):
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
'thumbnail': r're:http://.*\.jpg',
'duration': 39,
'timestamp': 1404273600,
'upload_date': '20140702',
'view_count': int,
},
}, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
@@ -33,6 +41,10 @@ class AllocineIE(InfoExtractor):
'title': 'Planes 2 Bande-annonce VF',
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
'thumbnail': r're:http://.*\.jpg',
'duration': 69,
'timestamp': 1385659800,
'upload_date': '20131128',
'view_count': int,
},
}, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
@@ -44,6 +56,10 @@ class AllocineIE(InfoExtractor):
'title': 'Dragons 2 - Bande annonce finale VF',
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
'thumbnail': r're:http://.*\.jpg',
'duration': 144,
'timestamp': 1397589900,
'upload_date': '20140415',
'view_count': int,
},
}, {
'url': 'http://www.allocine.fr/video/video-19550147/',
@@ -69,34 +85,37 @@ class AllocineIE(InfoExtractor):
r'data-model="([^"]+)"', webpage, 'data model', default=None)
if model:
model_data = self._parse_json(model, display_id)
for video_url in model_data['sources'].values():
video = model_data['videos'][0]
title = video['title']
for video_url in video['sources'].values():
video_id, format_id = url_basename(video_url).split('_')[:2]
formats.append({
'format_id': format_id,
'quality': quality(format_id),
'url': video_url,
})
title = model_data['title']
duration = int_or_none(video.get('duration'))
view_count = int_or_none(video.get('view_count'))
timestamp = unified_timestamp(try_get(
video, lambda x: x['added_at']['date'], compat_str))
else:
video_id = display_id
media_data = self._download_json(
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
title = remove_end(
self._html_search_regex(
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
' - AlloCiné')
for key, value in media_data['video'].items():
if not key.endswith('Path'):
continue
format_id = key[:-len('Path')]
formats.append({
'format_id': format_id,
'quality': quality(format_id),
'url': value,
})
title = remove_end(self._html_search_regex(
r'(?s)<title>(.+?)</title>', webpage, 'title'
).strip(), ' - AlloCiné')
duration, view_count, timestamp = [None] * 3
self._sort_formats(formats)
@@ -104,7 +123,10 @@ class AllocineIE(InfoExtractor):
'id': video_id,
'display_id': display_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'formats': formats,
}

View File

@@ -361,7 +361,7 @@ class BBCCoUkIE(InfoExtractor):
fmt.update({
'width': width,
'height': height,
'vbr': bitrate,
'tbr': bitrate,
'vcodec': encoding,
})
else:
@@ -370,7 +370,7 @@ class BBCCoUkIE(InfoExtractor):
'acodec': encoding,
'vcodec': 'none',
})
if protocol == 'http':
if protocol in ('http', 'https'):
# Direct link
fmt.update({
'url': href,
@@ -389,6 +389,8 @@ class BBCCoUkIE(InfoExtractor):
'rtmp_live': False,
'ext': 'flv',
})
else:
continue
formats.append(fmt)
elif kind == 'captions':
subtitles = self.extract_subtitles(media, programme_id)
@@ -407,7 +409,7 @@ class BBCCoUkIE(InfoExtractor):
description = smp_config['summary']
for item in smp_config['items']:
kind = item['kind']
if kind != 'programme' and kind != 'radioProgramme':
if kind not in ('programme', 'radioProgramme'):
continue
programme_id = item.get('vpid')
duration = int_or_none(item.get('duration'))
@@ -448,7 +450,7 @@ class BBCCoUkIE(InfoExtractor):
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme':
if kind not in ('programme', 'radioProgramme'):
continue
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)

View File

@@ -45,6 +45,9 @@ class CanalplusIE(InfoExtractor):
'itele': 'itele',
}
# Only works for direct mp4 URLs
_GEO_COUNTRIES = ['FR']
_TESTS = [{
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
'info_dict': {
@@ -56,6 +59,7 @@ class CanalplusIE(InfoExtractor):
'upload_date': '20160702',
},
}, {
# geo restricted, bypassed
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
'info_dict': {
'id': '1108190',
@@ -65,19 +69,20 @@ class CanalplusIE(InfoExtractor):
'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
'upload_date': '20140724',
},
'skip': 'Only works from France',
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
'md5': '4b47b12b4ee43002626b97fad8fb1de5',
# geo restricted, bypassed
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684',
'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d',
'info_dict': {
'id': '1420213',
'id': '1443684',
'display_id': 'pid6318-videos-integrales',
'ext': 'mp4',
'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
'upload_date': '20161014',
'title': 'Guess my iep ! - TPMP - 07/04/2017',
'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa',
'upload_date': '20170407',
},
'skip': 'Only works from France',
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
'info_dict': {
@@ -134,15 +139,15 @@ class CanalplusIE(InfoExtractor):
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD'])
fmt_url = next(iter(media.get('VIDEOS')))
if '/geo' in fmt_url.lower():
response = self._request_webpage(
HEADRequest(fmt_url), video_id,
'Checking if the video is georestricted')
if '/blocage' in response.geturl():
raise ExtractorError(
'The video is not available in your country',
expected=True)
# _, fmt_url = next(iter(media['VIDEOS'].items()))
# if '/geo' in fmt_url.lower():
# response = self._request_webpage(
# HEADRequest(fmt_url), video_id,
# 'Checking if the video is georestricted')
# if '/blocage' in response.geturl():
# raise ExtractorError(
# 'The video is not available in your country',
# expected=True)
formats = []
for format_id, format_url in media['VIDEOS'].items():

View File

@@ -7,6 +7,7 @@ from ..utils import float_or_none
class CanvasIE(InfoExtractor):
IE_DESC = 'canvas.be and een.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',

View File

@@ -12,13 +12,14 @@ from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
unescapeHTML,
urlencode_postdata,
USER_AGENTS,
)
class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': {
@@ -62,40 +63,12 @@ class CeskaTelevizeIE(InfoExtractor):
},
'skip': 'Georestricted to Czech Republic',
}, {
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart',
'description': 'Alternativní průvodce současným queer světem',
},
'playlist': [{
'info_dict': {
'id': '61924494876844842',
'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)',
'duration': 10.2,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
'only_matching': True,
}]
def _real_extract(self, url):
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
@@ -103,13 +76,28 @@ class CeskaTelevizeIE(InfoExtractor):
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
typ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
type_ = None
episode_id = None
playlist = self._parse_json(
self._search_regex(
r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
default='{}'), playlist_id)
if playlist:
type_ = playlist.get('type')
episode_id = playlist.get('id')
if not type_:
type_ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
webpage, 'type')
if not episode_id:
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
webpage, 'episode_id')
data = {
'playlist[0][type]': typ,
'playlist[0][type]': type_,
'playlist[0][id]': episode_id,
'requestUrl': compat_urllib_parse_urlparse(url).path,
'requestSource': 'iVysilani',
@@ -245,3 +233,47 @@ class CeskaTelevizeIE(InfoExtractor):
yield line
return '\r\n'.join(_fix_subtitle(subtitles))
class CeskaTelevizePoradyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart',
'description': 'Alternativní průvodce současným queer světem',
},
'playlist': [{
'info_dict': {
'id': '61924494876844842',
'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)',
'duration': 10.2,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_url = unescapeHTML(self._search_regex(
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'iframe player url', group='url'))
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())

View File

@@ -33,10 +33,17 @@ class ChaturbateIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer(
r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
m3u8_urls = []
if not m3u8_formats:
for m in re.finditer(
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group(
'url').replace('_fast', '')
for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
if m3u8_url not in m3u8_urls:
m3u8_urls.append(m3u8_url)
if not m3u8_urls:
error = self._search_regex(
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
@@ -50,7 +57,8 @@ class ChaturbateIE(InfoExtractor):
raise ExtractorError('Unable to find stream URL')
formats = []
for m3u8_id, m3u8_url in m3u8_formats:
for m3u8_url in m3u8_urls:
m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow'
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4',
# ffmpeg skips segments for fast m3u8

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
@@ -547,6 +548,34 @@ class InfoExtractor(object):
return encoding
def __check_blocked(self, content):
first_block = content[:512]
if ('<title>Access to this site is blocked</title>' in content and
'Websense' in first_block):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
'Websense information URL', default=None)
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in first_block:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
'blocklist.rkn.gov.ru' in content):
raise ExtractorError(
'Access to this webpage has been blocked by decision of the Russian government. '
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
@@ -588,25 +617,7 @@ class InfoExtractor(object):
except LookupError:
content = webpage_bytes.decode('utf-8', 'replace')
if ('<title>Access to this site is blocked</title>' in content and
'Websense' in content[:512]):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
'Websense information URL', default=None)
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in content[:512]:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
self.__check_blocked(content)
return content
@@ -1768,7 +1779,7 @@ class InfoExtractor(object):
if content_type == 'text':
# TODO implement WebVTT downloading
pass
elif content_type == 'video' or content_type == 'audio':
elif content_type in ('video', 'audio'):
base_url = ''
for element in (representation, adaptation_set, period, mpd_doc):
base_url_e = element.find(_add_ns('BaseURL'))

View File

@@ -390,7 +390,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
else:
webpage_url = 'http://www.' + mobj.group('url')
webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
webpage = self._download_webpage(
self._add_skip_wall(webpage_url), video_id,
headers=self.geo_verification_headers())
note_m = self._html_search_regex(
r'<div class="showmedia-trailer-notice">(.+?)</div>',
webpage, 'trailer-notice', default='')
@@ -565,7 +567,9 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(self._add_skip_wall(url), show_id)
webpage = self._download_webpage(
self._add_skip_wall(url), show_id,
headers=self.geo_verification_headers())
title = self._html_search_regex(
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
webpage, 'title')

View File

@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -49,6 +51,48 @@ class CuriosityStreamBaseIE(InfoExtractor):
limelight_media_id = media['limelight_media_id']
title = media['title']
formats = []
for encoding in media.get('encodings', []):
m3u8_url = encoding.get('master_playlist_url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
encoding_url = encoding.get('url')
file_url = encoding.get('file_url')
if not encoding_url and not file_url:
continue
f = {
'width': int_or_none(encoding.get('width')),
'height': int_or_none(encoding.get('height')),
'vbr': int_or_none(encoding.get('video_bitrate')),
'abr': int_or_none(encoding.get('audio_bitrate')),
'filesize': int_or_none(encoding.get('size_in_bytes')),
'vcodec': encoding.get('video_codec'),
'acodec': encoding.get('audio_codec'),
'container': encoding.get('container_type'),
}
for f_url in (encoding_url, file_url):
if not f_url:
continue
fmt = f.copy()
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
if rtmp:
fmt.update({
'url': rtmp.group('url'),
'play_path': rtmp.group('playpath'),
'app': rtmp.group('app'),
'ext': 'flv',
'format_id': 'rtmp',
})
else:
fmt.update({
'url': f_url,
'format_id': 'http',
})
formats.append(fmt)
self._sort_formats(formats)
subtitles = {}
for closed_caption in media.get('closed_captions', []):
sub_url = closed_caption.get('file')
@@ -60,16 +104,14 @@ class CuriosityStreamBaseIE(InfoExtractor):
})
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'limelight:media:' + limelight_media_id,
'formats': formats,
'title': title,
'description': media.get('description'),
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
'duration': int_or_none(media.get('duration')),
'tags': media.get('tags'),
'subtitles': subtitles,
'ie_key': 'LimelightMedia',
}
@@ -78,14 +120,12 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
_TEST = {
'url': 'https://app.curiositystream.com/video/2',
'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
'md5': '262bb2f257ff301115f1973540de8983',
'info_dict': {
'id': '2',
'ext': 'mp4',
'title': 'How Did You Develop The Internet?',
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
'timestamp': 1448388615,
'upload_date': '20151124',
}
}
@@ -105,7 +145,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
'title': 'Curious Minds: The Internet',
'description': 'How is the internet shaping our lives in the 21st Century?',
},
'playlist_mincount': 17,
'playlist_mincount': 12,
}
def _real_extract(self, url):

View File

@@ -82,6 +82,11 @@ class CWTVIE(InfoExtractor):
'url': quality_url,
'tbr': tbr,
})
video_metadata = video_data['assetFields']
ism_url = video_metadata.get('smoothStreamingUrl')
if ism_url:
formats.extend(self._extract_ism_formats(
ism_url, video_id, ism_id='mss', fatal=False))
self._sort_formats(formats)
thumbnails = [{
@@ -90,8 +95,6 @@ class CWTVIE(InfoExtractor):
'height': image.get('height'),
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
video_metadata = video_data['assetFields']
subtitles = {
'en': [{
'url': video_metadata['UnicornCcUrl'],

View File

@@ -0,0 +1,59 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import parse_duration
class DiscoveryVRIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
'md5': '32b1929798c464a54356378b7912eca4',
'info_dict': {
'id': 'discovery-vr-an-introduction',
'ext': 'mp4',
'title': 'Discovery VR - An Introduction',
'description': 'md5:80d418a10efb8899d9403e61d8790f06',
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
bootstrap_data = self._search_regex(
r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
webpage, 'bootstrap data')
bootstrap_data = self._parse_json(
bootstrap_data.encode('utf-8').decode('unicode_escape'),
display_id)
videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
video_data = next(video for video in videos if video.get('slug') == display_id)
series = video_data.get('showTitle')
title = episode = video_data.get('title') or series
if series and series != title:
title = '%s - %s' % (series, title)
formats = []
for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
f_url = video_data.get(f)
if not f_url:
continue
formats.append({
'format_id': format_id,
'url': f_url,
})
return {
'id': display_id,
'display_id': display_id,
'title': title,
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnail'),
'duration': parse_duration(video_data.get('runTime')),
'formats': formats,
'episode': episode,
'series': series,
}

View File

@@ -19,6 +19,7 @@ from .acast import (
ACastChannelIE,
)
from .addanime import AddAnimeIE
from .adn import ADNIE
from .adobetv import (
AdobeTVIE,
AdobeTVShowIE,
@@ -164,7 +165,10 @@ from .ccc import CCCIE
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE
from .ceskatelevize import (
CeskaTelevizeIE,
CeskaTelevizePoradyIE,
)
from .channel9 import Channel9IE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE
@@ -272,6 +276,7 @@ from .discoverygo import (
DiscoveryGoPlaylistIE,
)
from .discoverynetworks import DiscoveryNetworksDeIE
from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
from .dropbox import DropboxIE
@@ -379,6 +384,7 @@ from .globo import (
GloboArticleIE,
)
from .go import GoIE
from .go90 import Go90IE
from .godtube import GodTubeIE
from .godtv import GodTVIE
from .golem import GolemIE
@@ -536,6 +542,7 @@ from .mangomolo import (
)
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .medici import MediciIE
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .meta import METAIE
@@ -728,6 +735,10 @@ from .orf import (
ORFFM4IE,
ORFIPTVIE,
)
from .packtpub import (
PacktPubIE,
PacktPubCourseIE,
)
from .pandatv import PandaTVIE
from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE
@@ -797,7 +808,7 @@ from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
from .rai import (
RaiTVIE,
RaiPlayIE,
RaiIE,
)
from .rbmaradio import RBMARadioIE
@@ -828,7 +839,11 @@ from .rozhlas import RozhlasIE
from .rtbf import RTBFIE
from .rte import RteIE, RteRadioIE
from .rtlnl import RtlNlIE
from .rtl2 import RTL2IE
from .rtl2 import (
RTL2IE,
RTL2YouIE,
RTL2YouSeriesIE,
)
from .rtp import RTPIE
from .rts import RTSIE
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
@@ -970,6 +985,7 @@ from .theplatform import (
from .thescene import TheSceneIE
from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE
from .thesun import TheSunIE
from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
@@ -1016,8 +1032,10 @@ from .tv2 import (
TV2IE,
TV2ArticleIE,
)
from .tv2hu import TV2HuIE
from .tv3 import TV3IE
from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
from .tva import TVAIE
from .tvanouvelles import (
TVANouvellesIE,
@@ -1177,6 +1195,11 @@ from .voxmedia import VoxMediaIE
from .vporn import VpornIE
from .vrt import VRTIE
from .vrak import VrakIE
from .vrv import (
VRVIE,
VRVSeriesIE,
)
from .vshare import VShareIE
from .medialaan import MedialaanIE
from .vube import VubeIE
from .vuclip import VuClipIE

View File

@@ -7,9 +7,9 @@ from ..compat import (
compat_urllib_parse_unquote_plus,
)
from ..utils import (
clean_html,
determine_ext,
int_or_none,
js_to_json,
sanitized_Request,
ExtractorError,
urlencode_postdata
@@ -17,34 +17,26 @@ from ..utils import (
class FunimationIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
_NETRC_MACHINE = 'funimation'
_TESTS = [{
'url': 'http://www.funimation.com/shows/air/videos/official/breeze',
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
'info_dict': {
'id': '658',
'display_id': 'breeze',
'ext': 'mp4',
'title': 'Air - 1 - Breeze',
'description': 'md5:1769f43cd5fc130ace8fd87232207892',
'thumbnail': r're:https?://.*\.jpg',
},
'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
}, {
'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
'info_dict': {
'id': '31128',
'id': '91144',
'display_id': 'role-play',
'ext': 'mp4',
'title': '.hack//SIGN - 1 - Role Play',
'title': '.hack//SIGN - Role Play',
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
'thumbnail': r're:https?://.*\.jpg',
},
'skip': 'Access without user interaction is forbidden by CloudFlare',
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
'info_dict': {
'id': '9635',
'display_id': 'broadcast-dub-preview',
@@ -54,25 +46,13 @@ class FunimationIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(?:jpg|png)',
},
'skip': 'Access without user interaction is forbidden by CloudFlare',
}, {
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
'only_matching': True,
}]
_LOGIN_URL = 'http://www.funimation.com/login'
def _download_webpage(self, *args, **kwargs):
try:
return super(FunimationIE, self)._download_webpage(*args, **kwargs)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
response = ee.cause.read()
if b'>Please complete the security check to access<' in response:
raise ExtractorError(
'Access to funimation.com is blocked by CloudFlare. '
'Please browse to http://www.funimation.com/, solve '
'the reCAPTCHA, export browser cookies to a text file,'
' and then try again with --cookies YOUR_COOKIE_FILE.',
expected=True)
raise
def _extract_cloudflare_session_ua(self, url):
ci_session_cookie = self._get_cookies(url).get('ci_session')
if ci_session_cookie:
@@ -114,119 +94,74 @@ class FunimationIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
def _search_kane(name):
return self._search_regex(
r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
webpage, name, default=None)
title_data = self._parse_json(self._search_regex(
r'TITLE_DATA\s*=\s*({[^}]+})',
webpage, 'title data', default=''),
display_id, js_to_json, fatal=False) or {}
video_id = title_data.get('id') or self._search_regex([
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
r'<iframe[^>]+src="/player/(\d+)"',
], webpage, 'video_id', default=None)
if not video_id:
player_url = self._html_search_meta([
'al:web:url',
'og:video:url',
'og:video:secure_url',
], webpage, fatal=True)
video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
series = _search_kane('showName')
if series:
title = '%s - %s' % (series, title)
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
try:
sources = self._download_json(
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
video_id)['items']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
raise ExtractorError('%s said: %s' % (
self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
raise
errors = []
formats = []
ERRORS_MAP = {
'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn',
'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut',
'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut',
'ERROR_VIDEO_EXPIRED': 'videoExpired',
'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable',
'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription',
'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription',
'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding',
'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN',
'ERROR_STREAM_NOT_FOUND': 'streamNotFound',
}
USER_AGENTS = (
# PC UA is served with m3u8 that provides some bonus lower quality formats
('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'),
# Mobile UA allows to extract direct links and also does not fail when
# PC UA fails with hulu error (e.g.
# http://www.funimation.com/shows/hacksign/videos/official/role-play)
('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
)
user_agent = self._extract_cloudflare_session_ua(url)
if user_agent:
USER_AGENTS = ((None, user_agent),)
for kind, user_agent in USER_AGENTS:
request = sanitized_Request(url)
request.add_header('User-Agent', user_agent)
webpage = self._download_webpage(
request, display_id,
'Downloading %s webpage' % kind if kind else 'Downloading webpage')
playlist = self._parse_json(
self._search_regex(
r'var\s+playersData\s*=\s*(\[.+?\]);\n',
webpage, 'players data'),
display_id)[0]['playlist']
items = next(item['items'] for item in playlist if item.get('items'))
item = next(item for item in items if item.get('itemAK') == display_id)
error_messages = {}
video_error_messages = self._search_regex(
r'var\s+videoErrorMessages\s*=\s*({.+?});\n',
webpage, 'error messages', default=None)
if video_error_messages:
error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False)
if error_messages_json:
for _, error in error_messages_json.items():
type_ = error.get('type')
description = error.get('description')
content = error.get('content')
if type_ == 'text' and description and content:
error_message = ERRORS_MAP.get(description)
if error_message:
error_messages[error_message] = content
for video in item.get('videoSet', []):
auth_token = video.get('authToken')
if not auth_token:
continue
funimation_id = video.get('FUNImationID') or video.get('videoId')
preference = 1 if video.get('languageMode') == 'dub' else 0
if not auth_token.startswith('?'):
auth_token = '?%s' % auth_token
for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)):
format_url = video.get('%sUrl' % quality)
if not format_url:
continue
if not format_url.startswith(('http', '//')):
errors.append(format_url)
continue
if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native',
preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False))
else:
tbr = int_or_none(self._search_regex(
r'-(\d+)[Kk]', format_url, 'tbr', default=None))
formats.append({
'url': format_url + auth_token,
'format_id': '%s-http-%dp' % (funimation_id, height),
'height': height,
'tbr': tbr,
'preference': preference,
})
if not formats and errors:
raise ExtractorError(
'%s returned error: %s'
% (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))),
expected=True)
for source in sources:
source_url = source.get('src')
if not source_url:
continue
source_type = source.get('videoType') or determine_ext(source_url)
if source_type == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4',
m3u8_id='hls', fatal=False))
else:
formats.append({
'format_id': source_type,
'url': source_url,
})
self._sort_formats(formats)
title = item['title']
artist = item.get('artist')
if artist:
title = '%s - %s' % (artist, title)
description = self._og_search_description(webpage) or item.get('description')
thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl')
video_id = item.get('itemId') or display_id
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'thumbnail': self._og_search_thumbnail(webpage),
'series': series,
'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
'episode_number': int_or_none(title_data.get('episodeNum')),
'episode': episode,
'season_id': title_data.get('seriesId'),
'formats': formats,
}

View File

@@ -730,6 +730,21 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
# YouTube <object> embed
{
'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
'md5': '516718101ec834f74318df76259fb3cc',
'info_dict': {
'id': 'msN87y-iEx0',
'ext': 'webm',
'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
'upload_date': '20080526',
'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
'uploader': 'Christopher Sykes',
'uploader_id': 'ChristopherJSykes',
},
'add_ie': ['Youtube'],
},
# Camtasia studio
{
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
@@ -1080,6 +1095,21 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Kaltura'],
},
{
# Kaltura iframe embed
'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
'info_dict': {
'id': '0_f2cfbpwy',
'ext': 'mp4',
'title': 'I. M. Pei: A Centennial Celebration',
'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
'upload_date': '20170403',
'uploader_id': 'batchUser',
'timestamp': 1491232186,
},
'add_ie': ['Kaltura'],
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -1923,6 +1953,7 @@ class GenericIE(InfoExtractor):
data-video-url=|
<embed[^>]+?src=|
embedSWF\(?:\s*|
<object[^>]+data=|
new\s+SWFObject\(
)
(["\'])
@@ -2568,7 +2599,7 @@ class GenericIE(InfoExtractor):
webpage, video_id, transform_source=js_to_json)
if jwplayer_data:
info = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False)
jwplayer_data, video_id, require_title=False, base_url=url)
if not info.get('title'):
info['title'] = video_title
return info
@@ -2580,7 +2611,7 @@ class GenericIE(InfoExtractor):
return True
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
def filter_video(urls):
return list(filter(check_video, urls))

View File

@@ -0,0 +1,92 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
)
class Go90IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P<id>[0-9a-zA-Z]+)'
_TEST = {
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
'info_dict': {
'id': '84BUqjLpf9D',
'ext': 'mp4',
'title': 'Inside The Utah Coalition Against Pornography Convention',
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
'timestamp': 1491868800,
'upload_date': '20170411',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'https://www.go90.com/api/view/items/' + video_id,
video_id, headers={
'Content-Type': 'application/json; charset=utf-8',
}, data=b'{"client":"web","device_type":"pc"}')
title = video_data['title']
main_video_asset = video_data['main_video_asset']
thumbnails = []
formats = []
for asset in video_data.get('assets'):
if asset.get('id') == main_video_asset:
for source in asset.get('sources', []):
source_location = source.get('location')
if not source_location:
continue
source_type = source.get('type')
if source_type == 'hls':
m3u8_formats = self._extract_m3u8_formats(
source_location, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
for f in m3u8_formats:
mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url'])
if mobj:
height, tbr = mobj.groups()
height = int_or_none(height)
f.update({
'height': f.get('height') or height,
'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None),
'tbr': f.get('tbr') or int_or_none(tbr),
})
formats.extend(m3u8_formats)
elif source_type == 'dash':
formats.extend(self._extract_mpd_formats(
source_location, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'format_id': source.get('name'),
'url': source_location,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
'tbr': int_or_none(source.get('bitrate')),
})
elif asset.get('type') == 'image':
asset_location = asset.get('location')
if not asset_location:
continue
thumbnails.append({
'url': asset_location,
'width': int_or_none(asset.get('width')),
'height': int_or_none(asset.get('height')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': video_data.get('short_description'),
'like_count': int_or_none(video_data.get('like_count')),
'timestamp': parse_iso8601(video_data.get('released_at')),
}

View File

@@ -92,12 +92,14 @@ class HBOBaseIE(InfoExtractor):
video_url.replace('.tar', '/base_index_w8.m3u8'),
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
elif source.tag == 'hls':
# #EXT-X-BYTERANGE is not supported by native hls downloader
# and ffmpeg (#10955)
# formats.extend(self._extract_m3u8_formats(
# video_url.replace('.tar', '/base_index.m3u8'),
# video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
continue
m3u8_formats = self._extract_m3u8_formats(
video_url.replace('.tar', '/base_index.m3u8'),
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
for f in m3u8_formats:
if f.get('vcodec') == 'none' and not f.get('tbr'):
f['tbr'] = int_or_none(self._search_regex(
r'-(\d+)k/', f['url'], 'tbr', default=None))
formats.extend(m3u8_formats)
elif source.tag == 'dash':
formats.extend(self._extract_mpd_formats(
video_url.replace('.tar', '/manifest.mpd'),
@@ -110,7 +112,7 @@ class HBOBaseIE(InfoExtractor):
'width': format_info.get('width'),
'height': format_info.get('height'),
})
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
self._sort_formats(formats)
thumbnails = []
card_sizes = xpath_element(video_data, 'titleCardSizes')

View File

@@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor):
}],
},
},
'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
'params': {
'skip_download': True,
},
@@ -107,27 +108,37 @@ class KalturaIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
mobj = (
re.search(
r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\(
\{.*?
(?P<q1>['\"])wid(?P=q1)\s*:\s*
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
(?P<q1>['"])wid(?P=q1)\s*:\s*
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage) or
re.search(
r'''(?xs)
(?P<q1>["\'])
(?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
(?P=q1).*?
(?:
entry_?[Ii]d|
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
(?P<q2>["'])entry_?[Ii]d(?P=q2)
)\s*:\s*
(?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage))
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage) or
re.search(
r'''(?xs)
<iframe[^>]+src=(?P<q1>["'])
(?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
(?:(?!(?P=q1)).)*
[?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
(?P=q1)
''', webpage)
)
if mobj:
embed_info = mobj.groupdict()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info

View File

@@ -62,13 +62,21 @@ class LimelightBaseIE(InfoExtractor):
fmt = {
'url': stream_url,
'abr': float_or_none(stream.get('audioBitRate')),
'vbr': float_or_none(stream.get('videoBitRate')),
'fps': float_or_none(stream.get('videoFrameRate')),
'width': int_or_none(stream.get('videoWidthInPixels')),
'height': int_or_none(stream.get('videoHeightInPixels')),
'ext': ext,
}
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
width = int_or_none(stream.get('videoWidthInPixels'))
height = int_or_none(stream.get('videoHeightInPixels'))
vbr = float_or_none(stream.get('videoBitRate'))
if width or height or vbr:
fmt.update({
'width': width,
'height': height,
'vbr': vbr,
})
else:
fmt['vcodec'] = 'none'
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
if rtmp:
format_id = 'rtmp'
if stream.get('videoBitRate'):

View File

@@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_strdate,
update_url_query,
urlencode_postdata,
)
class MediciIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
_TEST = {
'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
'md5': '004c21bb0a57248085b6ff3fec72719d',
'info_dict': {
'id': '3059',
'ext': 'flv',
'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170408',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
# Sets csrftoken cookie
self._download_webpage(url, video_id)
MEDICI_URL = 'http://www.medici.tv/'
data = self._download_json(
MEDICI_URL, video_id,
data=urlencode_postdata({
'json': 'true',
'page': '/%s' % video_id,
'timezone_offset': -420,
}), headers={
'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
'X-Requested-With': 'XMLHttpRequest',
'Referer': MEDICI_URL,
'Content-Type': 'application/x-www-form-urlencoded',
})
video = data['video']['videos']['video1']
title = video.get('nom') or data['title']
video_id = video.get('id') or video_id
formats = self._extract_f4m_formats(
update_url_query(video['url_akamai'], {
'hdcore': '3.1.0',
'plugin=aasp': '3.1.0.43.124',
}), video_id, f4m_id='hds')
description = data.get('meta_description')
thumbnail = video.get('url_thumbnail') or data.get('main_image')
upload_date = unified_strdate(data['video'].get('date'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'formats': formats,
}

View File

@@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor):
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>',
r'm-tooltip=["\']([\d,.]+) plays'],
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
webpage, 'play count', default=None))
return {
@@ -138,12 +138,12 @@ class MixcloudPlaylistBaseIE(InfoExtractor):
def _get_user_description(self, page_content):
return self._html_search_regex(
r'<div[^>]+class="description-text"[^>]*>(.+?)</div>',
r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>',
page_content, 'user description', fatal=False)
class MixcloudUserIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
IE_NAME = 'mixcloud:user'
_TESTS = [{
@@ -151,7 +151,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'playlist_mincount': 11,
}, {
@@ -159,7 +159,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'playlist_mincount': 11,
}, {
@@ -167,7 +167,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_favorites',
'title': 'Daniel Holbach (favorites)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'params': {
'playlist_items': '1-100',
@@ -178,7 +178,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_listens',
'title': 'Daniel Holbach (listens)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'params': {
'playlist_items': '1-100',
@@ -216,7 +216,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
IE_NAME = 'mixcloud:playlist'
_TESTS = [{
@@ -229,12 +229,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
'playlist_mincount': 16,
}, {
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
'info_dict': {
'id': 'maxvibes_jazzcat-on-ness-radio',
'title': 'Jazzcat on Ness Radio',
'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
},
'playlist_mincount': 23
'only_matching': True,
}]
def _real_extract(self, url):
@@ -243,15 +238,16 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
playlist_id = mobj.group('playlist')
video_id = '%s_%s' % (user_id, playlist_id)
profile = self._download_webpage(
webpage = self._download_webpage(
url, user_id,
note='Downloading playlist page',
errnote='Unable to download playlist page')
description = self._get_user_description(profile)
playlist_title = self._html_search_regex(
r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>',
profile, 'playlist title')
title = self._html_search_regex(
r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
webpage, 'playlist title',
default=None) or self._og_search_title(webpage, fatal=False)
description = self._get_user_description(webpage)
entries = OnDemandPagedList(
functools.partial(
@@ -259,11 +255,11 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
'%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
self._PAGE_SIZE)
return self.playlist_result(entries, video_id, playlist_title, description)
return self.playlist_result(entries, video_id, title, description)
class MixcloudStreamIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
IE_NAME = 'mixcloud:stream'
_TEST = {

View File

@@ -313,9 +313,9 @@ class NPOIE(NPOBaseIE):
class NPOLiveIE(NPOBaseIE):
IE_NAME = 'npo.nl:live'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
_TEST = {
_TESTS = [{
'url': 'http://www.npo.nl/live/npo-1',
'info_dict': {
'id': 'LI_NL1_4188102',
@@ -327,10 +327,13 @@ class NPOLiveIE(NPOBaseIE):
'params': {
'skip_download': True,
}
}
}, {
'url': 'http://www.npo.nl/live',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
display_id = self._match_id(url) or 'npo-1'
webpage = self._download_webpage(url, display_id)

View File

@@ -176,7 +176,7 @@ class OdnoklassnikiIE(InfoExtractor):
})
return info
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd', 'full'))
formats = [{
'url': f['url'],

View File

@@ -75,51 +75,38 @@ class OpenloadIE(InfoExtractor):
'<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
webpage, 'openload ID')
video_url_chars = []
first_char = ord(ol_id[0])
key = first_char - 55
maxKey = max(2, key)
key = min(maxKey, len(ol_id) - 38)
t = ol_id[key:key + 36]
hashMap = {}
v = ol_id.replace(t, '')
h = 0
while h < len(t):
f = t[h:h + 3]
i = int(f, 8)
hashMap[h / 3] = i
h += 3
h = 0
H = 0
while h < len(v):
B = ''
C = ''
if len(v) >= h + 2:
B = v[h:h + 2]
if len(v) >= h + 3:
C = v[h:h + 3]
i = int(B, 16)
h += 2
if H % 3 == 0:
i = int(C, 8)
h += 1
elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60:
i = int(C, 10)
h += 1
index = H % 7
A = hashMap[index]
i ^= 213
i ^= A
video_url_chars.append(compat_chr(i))
H += 1
decoded = ''
a = ol_id[0:24]
b = []
for i in range(0, len(a), 8):
b.append(int(a[i:i + 8] or '0', 16))
ol_id = ol_id[24:]
j = 0
k = 0
while j < len(ol_id):
c = 128
d = 0
e = 0
f = 0
_more = True
while _more:
if j + 1 >= len(ol_id):
c = 143
f = int(ol_id[j:j + 2] or '0', 16)
j += 2
d += (f & 127) << e
e += 7
_more = f >= c
g = d ^ b[k % 3]
for i in range(4):
char_dec = (g >> 8 * i) & (c + 127)
char = compat_chr(char_dec)
if char != '#':
decoded += char
k += 1
video_url = 'https://openload.co/stream/%s?mime=true'
video_url = video_url % (''.join(video_url_chars))
video_url = video_url % decoded
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,

View File

@@ -0,0 +1,138 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
ExtractorError,
remove_end,
strip_or_none,
unified_timestamp,
urljoin,
)
class PacktPubBaseIE(InfoExtractor):
_PACKT_BASE = 'https://www.packtpub.com'
_MAPT_REST = '%s/mapt-rest' % _PACKT_BASE
class PacktPubIE(PacktPubBaseIE):
_VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
_TEST = {
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
'info_dict': {
'id': '20530',
'ext': 'mp4',
'title': 'Project Intro',
'thumbnail': r're:(?i)^https?://.*\.jpg',
'timestamp': 1490918400,
'upload_date': '20170331',
},
}
def _handle_error(self, response):
if response.get('status') != 'success':
raise ExtractorError(
'% said: %s' % (self.IE_NAME, response['message']),
expected=True)
def _download_json(self, *args, **kwargs):
response = super(PacktPubIE, self)._download_json(*args, **kwargs)
self._handle_error(response)
return response
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
course_id, chapter_id, video_id = mobj.group(
'course_id', 'chapter_id', 'id')
video = self._download_json(
'%s/users/me/products/%s/chapters/%s/sections/%s'
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
'Downloading JSON video')['data']
content = video.get('content')
if not content:
raise ExtractorError('This video is locked', expected=True)
video_url = content['file']
metadata = self._download_json(
'%s/products/%s/chapters/%s/sections/%s/metadata'
% (self._MAPT_REST, course_id, chapter_id, video_id),
video_id)['data']
title = metadata['pageTitle']
course_title = metadata.get('title')
if course_title:
title = remove_end(title, ' - %s' % course_title)
timestamp = unified_timestamp(metadata.get('publicationDate'))
thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'timestamp': timestamp,
}
class PacktPubCourseIE(PacktPubBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))'
_TEST = {
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
'info_dict': {
'id': '9781787122215',
'title': 'Learn Nodejs by building 12 projects [Video]',
},
'playlist_count': 90,
}
@classmethod
def suitable(cls, url):
return False if PacktPubIE.suitable(url) else super(
PacktPubCourseIE, cls).suitable(url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url, course_id = mobj.group('url', 'id')
course = self._download_json(
'%s/products/%s/metadata' % (self._MAPT_REST, course_id),
course_id)['data']
entries = []
for chapter_num, chapter in enumerate(course['tableOfContents'], 1):
if chapter.get('type') != 'chapter':
continue
children = chapter.get('children')
if not isinstance(children, list):
continue
chapter_info = {
'chapter': chapter.get('title'),
'chapter_number': chapter_num,
'chapter_id': chapter.get('id'),
}
for section in children:
if section.get('type') != 'section':
continue
section_url = section.get('seoUrl')
if not isinstance(section_url, compat_str):
continue
entry = {
'_type': 'url_transparent',
'url': urljoin(url + '/', section_url),
'title': strip_or_none(section.get('title')),
'description': clean_html(section.get('summary')),
'ie_key': PacktPubIE.ie_key(),
}
entry.update(chapter_info)
entries.append(entry)
return self.playlist_result(entries, course_id, course.get('title'))

View File

@@ -20,7 +20,7 @@ class PeriscopeBaseIE(InfoExtractor):
class PeriscopeIE(PeriscopeBaseIE):
IE_DESC = 'Periscope'
IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
@@ -41,6 +41,9 @@ class PeriscopeIE(PeriscopeBaseIE):
}, {
'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
'only_matching': True,
}, {
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
'only_matching': True,
}]
@staticmethod
@@ -103,7 +106,7 @@ class PeriscopeIE(PeriscopeBaseIE):
class PeriscopeUserIE(PeriscopeBaseIE):
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$'
IE_DESC = 'Periscope user videos'
IE_NAME = 'periscope:user'

View File

@@ -1,23 +1,40 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..compat import (
compat_urlparse,
compat_str,
)
from ..utils import (
determine_ext,
ExtractorError,
determine_ext,
find_xpath_attr,
fix_xml_ampersands,
GeoRestrictedError,
int_or_none,
parse_duration,
strip_or_none,
try_get,
unified_strdate,
unified_timestamp,
update_url_query,
urljoin,
xpath_text,
)
class RaiBaseIE(InfoExtractor):
def _extract_relinker_formats(self, relinker_url, video_id):
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
_GEO_COUNTRIES = ['IT']
_GEO_BYPASS = False
def _extract_relinker_info(self, relinker_url, video_id):
formats = []
geoprotection = None
is_live = None
duration = None
for platform in ('mon', 'flash', 'native'):
relinker = self._download_xml(
@@ -27,9 +44,27 @@ class RaiBaseIE(InfoExtractor):
query={'output': 45, 'pl': platform},
headers=self.geo_verification_headers())
media_url = find_xpath_attr(relinker, './url', 'type', 'content').text
if not geoprotection:
geoprotection = xpath_text(
relinker, './geoprotection', default=None) == 'Y'
if not is_live:
is_live = xpath_text(
relinker, './is_live', default=None) == 'Y'
if not duration:
duration = parse_duration(xpath_text(
relinker, './duration', default=None))
url_elem = find_xpath_attr(relinker, './url', 'type', 'content')
if url_elem is None:
continue
media_url = url_elem.text
# This does not imply geo restriction (e.g.
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
if media_url == 'http://download.rai.it/video_no_available.mp4':
self.raise_geo_restricted()
continue
ext = determine_ext(media_url)
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
@@ -53,215 +88,333 @@ class RaiBaseIE(InfoExtractor):
'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
})
return formats
if not formats and geoprotection is True:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
def _extract_from_content_id(self, content_id, base_url):
return dict((k, v) for k, v in {
'is_live': is_live,
'duration': duration,
'formats': formats,
}.items() if v is not None)
@staticmethod
def _extract_subtitles(url, subtitle_url):
subtitles = {}
if subtitle_url and isinstance(subtitle_url, compat_str):
subtitle_url = urljoin(url, subtitle_url)
STL_EXT = '.stl'
SRT_EXT = '.srt'
subtitles['it'] = [{
'ext': 'stl',
'url': subtitle_url,
}]
if subtitle_url.endswith(STL_EXT):
srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT
subtitles['it'].append({
'ext': 'srt',
'url': srt_url,
})
return subtitles
class RaiPlayIE(RaiBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
_TESTS = [{
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
'md5': '340aa3b7afb54bfd14a8c11786450d76',
'info_dict': {
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
'ext': 'mp4',
'title': 'La Casa Bianca',
'alt_title': 'S2016 - Puntata del 23/10/2016',
'description': 'md5:a09d45890850458077d1f68bb036e0a5',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai 3',
'creator': 'Rai 3',
'duration': 3278,
'timestamp': 1477764300,
'upload_date': '20161029',
'series': 'La Casa Bianca',
'season': '2016',
},
}, {
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
'info_dict': {
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
'ext': 'mp4',
'title': 'Report del 07/04/2014',
'alt_title': 'S2013/14 - Puntata del 07/04/2014',
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai 5',
'creator': 'Rai 5',
'duration': 6160,
'series': 'Report',
'season_number': 5,
'season': '2013/14',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url, video_id = mobj.group('url', 'id')
media = self._download_json(
'%s?json' % url, video_id, 'Downloading video JSON')
title = media['name']
video = media['video']
relinker_info = self._extract_relinker_info(video['contentUrl'], video_id)
self._sort_formats(relinker_info['formats'])
thumbnails = []
if 'images' in media:
for _, value in media.get('images').items():
if value:
thumbnails.append({
'url': value.replace('[RESOLUTION]', '600x400')
})
timestamp = unified_timestamp(try_get(
media, lambda x: x['availabilities'][0]['start'], compat_str))
subtitles = self._extract_subtitles(url, video.get('subtitles'))
info = {
'id': video_id,
'title': title,
'alt_title': media.get('subtitle'),
'description': media.get('description'),
'uploader': media.get('channel'),
'creator': media.get('editor'),
'duration': parse_duration(video.get('duration')),
'timestamp': timestamp,
'thumbnails': thumbnails,
'series': try_get(
media, lambda x: x['isPartOf']['name'], compat_str),
'season_number': int_or_none(try_get(
media, lambda x: x['isPartOf']['numeroStagioni'])),
'season': media.get('stagione') or None,
'subtitles': subtitles,
}
info.update(relinker_info)
return info
class RaiIE(RaiBaseIE):
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
_TESTS = [{
# var uniquename = "ContentItem-..."
# data-id="ContentItem-..."
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
'info_dict': {
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
'ext': 'mp4',
'title': 'TG PRIMO TEMPO',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1758,
'upload_date': '20140612',
}
}, {
# with ContentItem in many metas
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
'info_dict': {
'id': '1632c009-c843-4836-bb65-80c33084a64b',
'ext': 'mp4',
'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
'description': 'I film in uscita questa settimana.',
'thumbnail': r're:^https?://.*\.png$',
'duration': 833,
'upload_date': '20161103',
}
}, {
# with ContentItem in og:url
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
'md5': '11959b4e44fa74de47011b5799490adf',
'info_dict': {
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
'ext': 'mp4',
'title': 'TG1 ore 20:00 del 03/11/2016',
'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2214,
'upload_date': '20161103',
}
}, {
# drawMediaRaiTV(...)
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
'md5': '2dd727e61114e1ee9c47f0da6914e178',
'info_dict': {
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
'ext': 'mp4',
'title': 'Il pacco',
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20141221',
},
}, {
# initEdizione('ContentItem-...'
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
'info_dict': {
'id': 'c2187016-8484-4e3a-8ac8-35e475b07303',
'ext': 'mp4',
'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}',
'duration': 2274,
'upload_date': '20170401',
},
'skip': 'Changes daily',
}, {
# HDS live stream with only relinker URL
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
'info_dict': {
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
'ext': 'flv',
'title': 'EuroNews',
},
'params': {
'skip_download': True,
},
}, {
# HLS live stream with ContentItem in og:url
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
'info_dict': {
'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
'ext': 'mp4',
'title': 'La diretta di Rainews24',
},
'params': {
'skip_download': True,
},
}]
def _extract_from_content_id(self, content_id, url):
media = self._download_json(
'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
content_id, 'Downloading video JSON')
title = media['name'].strip()
media_type = media['type']
if 'Audio' in media_type:
relinker_info = {
'formats': {
'format_id': media.get('formatoAudio'),
'url': media['audioUrl'],
'ext': media.get('formatoAudio'),
}
}
elif 'Video' in media_type:
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
else:
raise ExtractorError('not a media file')
self._sort_formats(relinker_info['formats'])
thumbnails = []
for image_type in ('image', 'image_medium', 'image_300'):
thumbnail_url = media.get(image_type)
if thumbnail_url:
thumbnails.append({
'url': compat_urlparse.urljoin(base_url, thumbnail_url),
'url': compat_urlparse.urljoin(url, thumbnail_url),
})
formats = []
media_type = media['type']
if 'Audio' in media_type:
formats.append({
'format_id': media.get('formatoAudio'),
'url': media['audioUrl'],
'ext': media.get('formatoAudio'),
})
elif 'Video' in media_type:
formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id))
self._sort_formats(formats)
else:
raise ExtractorError('not a media file')
subtitles = self._extract_subtitles(url, media.get('subtitlesUrl'))
subtitles = {}
captions = media.get('subtitlesUrl')
if captions:
STL_EXT = '.stl'
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = [{
'ext': 'srt',
'url': captions,
}]
return {
info = {
'id': content_id,
'title': media['name'],
'description': media.get('desc'),
'title': title,
'description': strip_or_none(media.get('desc')),
'thumbnails': thumbnails,
'uploader': media.get('author'),
'upload_date': unified_strdate(media.get('date')),
'duration': parse_duration(media.get('length')),
'formats': formats,
'subtitles': subtitles,
}
info.update(relinker_info)
class RaiTVIE(RaiBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+(?:media|ondemand)/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
_TESTS = [
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
'info_dict': {
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
'ext': 'mp4',
'title': 'Report del 07/04/2014',
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
'upload_date': '20140407',
'duration': 6160,
'thumbnail': r're:^https?://.*\.jpg$',
}
},
{
# no m3u8 stream
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
# HDS download, MD5 is unstable
'info_dict': {
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
'ext': 'flv',
'title': 'TG PRIMO TEMPO',
'upload_date': '20140612',
'duration': 1758,
'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'Geo-restricted to Italy',
},
{
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
'md5': '35cf7c229f22eeef43e48b5cf923bef0',
'info_dict': {
'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13',
'ext': 'mp4',
'title': 'State of the Net, Antonella La Carpia: regole virali',
'description': 'md5:b0ba04a324126903e3da7763272ae63c',
'upload_date': '20140613',
},
'skip': 'Error 404',
},
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html',
'info_dict': {
'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132',
'ext': 'mp4',
'title': 'Alluvione in Sardegna e dissesto idrogeologico',
'description': 'Edizione delle ore 20:30 ',
},
'skip': 'invalid urls',
},
{
'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
'md5': 'e57493e1cb8bc7c564663f363b171847',
'info_dict': {
'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
'ext': 'mp4',
'title': 'Il Candidato - Primo episodio: "Le Primarie"',
'description': 'md5:364b604f7db50594678f483353164fb8',
'upload_date': '20140923',
'duration': 386,
'thumbnail': r're:^https?://.*\.jpg$',
}
},
]
return info
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_from_content_id(video_id, url)
class RaiIE(RaiBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
_TESTS = [
{
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
'md5': '2dd727e61114e1ee9c47f0da6914e178',
'info_dict': {
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
'ext': 'mp4',
'title': 'Il pacco',
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
'upload_date': '20141221',
},
},
{
# Direct relinker URL
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
# HDS live stream, MD5 is unstable
'info_dict': {
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
'ext': 'flv',
'title': 'EuroNews',
},
'skip': 'Geo-restricted to Italy',
},
{
# Embedded content item ID
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
'md5': '84c1135ce960e8822ae63cec34441d63',
'info_dict': {
'id': '0960e765-62c8-474a-ac4b-7eb3e2be39c8',
'ext': 'mp4',
'title': 'TG1 ore 20:00 del 02/07/2016',
'upload_date': '20160702',
},
},
{
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
# HDS live stream, MD5 is unstable
'info_dict': {
'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
'ext': 'flv',
'title': 'La diretta di Rainews24',
},
},
]
@classmethod
def suitable(cls, url):
return False if RaiTVIE.suitable(url) else super(RaiIE, cls).suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
iframe_url = self._search_regex(
[r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"',
r'drawMediaRaiTV\(["\'](.+?)["\']'],
webpage, 'iframe', default=None)
if iframe_url:
if not iframe_url.startswith('http'):
iframe_url = compat_urlparse.urljoin(url, iframe_url)
return self.url_result(iframe_url)
content_item_id = None
content_item_id = self._search_regex(
r'initEdizione\((?P<q1>[\'"])ContentItem-(?P<content_id>[^\'"]+)(?P=q1)',
webpage, 'content item ID', group='content_id', default=None)
content_item_url = self._html_search_meta(
('og:url', 'og:video', 'og:video:secure_url', 'twitter:url',
'twitter:player', 'jsonlink'), webpage, default=None)
if content_item_url:
content_item_id = self._search_regex(
r'ContentItem-(%s)' % self._UUID_RE, content_item_url,
'content item id', default=None)
if not content_item_id:
content_item_id = self._search_regex(
r'''(?x)
(?:
(?:initEdizione|drawMediaRaiTV)\(|
<(?:[^>]+\bdata-id|var\s+uniquename)=
)
(["\'])
(?:(?!\1).)*\bContentItem-(?P<id>%s)
''' % self._UUID_RE,
webpage, 'content item id', default=None, group='id')
content_item_ids = set()
if content_item_id:
return self._extract_from_content_id(content_item_id, url)
content_item_ids.add(content_item_id)
if video_id not in content_item_ids:
content_item_ids.add(video_id)
relinker_url = compat_urlparse.urljoin(url, self._search_regex(
r'(?:var\s+videoURL|mediaInfo\.mediaUri)\s*=\s*(?P<q1>[\'"])(?P<url>(https?:)?//mediapolis\.rai\.it/relinker/relinkerServlet\.htm\?cont=\d+)(?P=q1)',
webpage, 'relinker URL', group='url'))
formats = self._extract_relinker_formats(relinker_url, video_id)
self._sort_formats(formats)
for content_item_id in content_item_ids:
try:
return self._extract_from_content_id(content_item_id, url)
except GeoRestrictedError:
raise
except ExtractorError:
pass
relinker_url = self._search_regex(
r'''(?x)
(?:
var\s+videoURL|
mediaInfo\.mediaUri
)\s*=\s*
([\'"])
(?P<url>
(?:https?:)?
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
''',
webpage, 'relinker URL', group='url')
relinker_info = self._extract_relinker_info(
urljoin(url, relinker_url), video_id)
self._sort_formats(relinker_info['formats'])
title = self._search_regex(
r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
webpage, 'title', group='title', default=None) or self._og_search_title(webpage)
webpage, 'title', group='title',
default=None) or self._og_search_title(webpage)
return {
info = {
'id': video_id,
'title': title,
'formats': formats,
}
info.update(relinker_info)
return info

View File

@@ -13,15 +13,15 @@ from ..utils import (
class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
'info_dict': {
'id': 'ford-lopatin-live-at-primavera-sound-2011',
'ext': 'mp3',
'title': 'Main Stage - Ford & Lopatin',
'description': 'md5:4f340fb48426423530af5a9d87bd7b91',
'title': 'Main Stage - Ford & Lopatin at Primavera Sound',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2452,
'timestamp': 1307103164,

View File

@@ -1,13 +1,26 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..utils import int_or_none
from ..aes import aes_cbc_decrypt
from ..compat import (
compat_ord,
compat_str,
)
from ..utils import (
bytes_to_intlist,
ExtractorError,
intlist_to_bytes,
int_or_none,
strip_or_none,
)
class RTL2IE(InfoExtractor):
IE_NAME = 'rtl2'
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
_TESTS = [{
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
@@ -98,3 +111,98 @@ class RTL2IE(InfoExtractor):
'duration': int_or_none(video_info.get('duration')),
'formats': formats,
}
class RTL2YouBaseIE(InfoExtractor):
_BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/'
class RTL2YouIE(RTL2YouBaseIE):
IE_NAME = 'rtl2:you'
_VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du',
'info_dict': {
'id': '15740',
'ext': 'mp4',
'title': 'MJUNIK Home of YOU - #307 Hirn, wo bist du?!',
'description': 'md5:ddaa95c61b372b12b66e115b2772fe01',
'age_limit': 12,
},
}, {
'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712',
'only_matching': True,
}]
_AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!'
_GEO_COUNTRIES = ['DE']
def _real_extract(self, url):
video_id = self._match_id(url)
stream_data = self._download_json(
self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':')
stream_url = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(data)),
bytes_to_intlist(self._AES_KEY),
bytes_to_intlist(base64.b64decode(iv))
))
if b'rtl2_you_video_not_found' in stream_url:
raise ExtractorError('video not found', expected=True)
formats = self._extract_m3u8_formats(
stream_url[:-compat_ord(stream_url[-1])].decode(),
video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
video_data = self._download_json(
self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
series = video_data.get('formatTitle')
title = episode = video_data.get('title') or series
if series and series != title:
title = '%s - %s' % (series, title)
return {
'id': video_id,
'title': title,
'formats': formats,
'description': strip_or_none(video_data.get('description')),
'thumbnail': video_data.get('image'),
'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000),
'series': series,
'episode': episode,
'age_limit': int_or_none(video_data.get('minimumAge')),
}
class RTL2YouSeriesIE(RTL2YouBaseIE):
IE_NAME = 'rtl2:you:series'
_VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)'
_TEST = {
'url': 'http://you.rtl2.de/videos/115/dragon-ball',
'info_dict': {
'id': '115',
},
'playlist_mincount': 5,
}
def _real_extract(self, url):
series_id = self._match_id(url)
stream_data = self._download_json(
self._BACKWERK_BASE_URL + 'videos',
series_id, query={
'formatId': series_id,
'limit': 1000000000,
})
entries = []
for video in stream_data.get('videos', []):
video_id = compat_str(video['videoId'])
if not video_id:
continue
entries.append(self.url_result(
'http://you.rtl2.de/video/%s/%s' % (series_id, video_id),
'RTL2You', video_id))
return self.playlist_result(entries, series_id)

View File

@@ -26,7 +26,7 @@ class RudoIE(InfoExtractor):
}
@classmethod
def _extract_url(self, webpage):
def _extract_url(cls, webpage):
mobj = re.search(
r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
webpage)

View File

@@ -0,0 +1,32 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
class TheSunIE(InfoExtractor):
_VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
'info_dict': {
'id': '2261604',
'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
},
'playlist_count': 2,
}
def _real_extract(self, url):
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)
entries = []
for ooyala_id in re.findall(
r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)',
webpage):
entries.append(OoyalaIE._build_url_result(ooyala_id))
return self.playlist_result(
entries, article_id, self._og_search_title(webpage, fatal=False))

View File

@@ -0,0 +1,62 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
class TV2HuIE(InfoExtractor):
IE_NAME = 'tv2.hu'
_VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:[^/]+/)+(?P<id>\d+)_[^/?#]+?\.html'
_TESTS = [{
'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html',
'md5': '585e58e2e090f34603804bb2c48e98d8',
'info_dict': {
'id': '217679',
'ext': 'mp4',
'title': 'Ezek megőrültek! - 1. adás 1. rész',
'upload_date': '20160826',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html',
'only_matching': True
}, {
'url': 'http://tv2.hu/musoraink/aktiv/aktiv_teljes_adas/217963_aktiv-teljes-adas---2016.08.30..html',
'only_matching': True
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_url = self._search_regex(
r'jsonUrl\s*=\s*"([^"]+)"', webpage, 'json url')
json_data = self._download_json(json_url, video_id)
formats = []
for b in ('bitrates', 'backupBitrates'):
bitrates = json_data.get(b, {})
m3u8_url = bitrates.get('hls')
if m3u8_url:
formats.extend(self._extract_wowza_formats(
m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp']))
for mp4_url in bitrates.get('mp4', []):
height = int_or_none(self._search_regex(
r'\.(\d+)p\.mp4', mp4_url, 'height', default=None))
formats.append({
'format_id': 'http' + ('-%d' % height if height else ''),
'url': mp4_url,
'height': height,
'width': int_or_none(height / 9.0 * 16.0 if height else None),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage).strip(),
'thumbnail': self._og_search_thumbnail(webpage),
'upload_date': self._search_regex(
r'/vod/(\d{8})/', json_url, 'upload_date', default=None),
'formats': formats,
}

View File

@@ -0,0 +1,79 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
extract_attributes,
get_element_by_class,
int_or_none,
parse_duration,
parse_iso8601,
)
class TV5MondePlusIE(InfoExtractor):
IE_DESC = 'TV5MONDE+'
_VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
'md5': '12130fc199f020673138a83466542ec6',
'info_dict': {
'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
'ext': 'mp4',
'title': 'Tdah, mon amour - Enfants',
'description': 'md5:230e3aca23115afcf8006d1bece6df74',
'upload_date': '20170401',
'timestamp': 1491022860,
}
}
_GEO_BYPASS = False
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
series = get_element_by_class('video-detail__title', webpage)
title = episode = get_element_by_class(
'video-detail__subtitle', webpage) or series
if series and series != title:
title = '%s - %s' % (series, title)
vpl_data = extract_attributes(self._search_regex(
r'(<[^>]+class="video_player_loader"[^>]+>)',
webpage, 'video player loader'))
video_files = self._parse_json(
vpl_data['data-broadcast'], display_id).get('files', [])
formats = []
for video_file in video_files:
v_url = video_file.get('url')
if not v_url:
continue
video_format = video_file.get('format') or determine_ext(v_url)
if video_format == 'm3u8':
formats.extend(self._extract_m3u8_formats(
v_url, display_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': v_url,
'format_id': video_format,
})
self._sort_formats(formats)
return {
'id': display_id,
'display_id': display_id,
'title': title,
'description': clean_html(get_element_by_class('video-detail__description', webpage)),
'thumbnail': vpl_data.get('data-image'),
'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)),
'formats': formats,
'episode': episode,
'series': series,
}

View File

@@ -225,7 +225,11 @@ class TVPlayIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
geo_country = self._search_regex(
r'https?://[^/]+\.([a-z]{2})', url,
'geo country', default=None)
if geo_country:
self._initialize_geo_bypass([geo_country.upper()])
video = self._download_json(
'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')

View File

@@ -9,6 +9,7 @@ from .common import InfoExtractor
class VierIE(InfoExtractor):
IE_NAME = 'vier'
IE_DESC = 'vier.be and vijf.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
_TESTS = [{
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',

View File

@@ -68,7 +68,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
type_ = source.get('type')
ext = determine_ext(file_)
format_id = source.get('label') or ext
if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)):
if all(v in ('m3u8', 'hls') for v in (type_, ext)):
formats.extend(self._extract_m3u8_formats(
file_, video_id, 'mp4', m3u8_id='hls'))
else:

View File

@@ -70,9 +70,9 @@ class VLiveIE(InfoExtractor):
status, long_video_id, key = params[2], params[5], params[6]
status = remove_start(status, 'PRODUCT_')
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
return self._live(video_id, webpage)
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
if long_video_id and key:
return self._replay(video_id, webpage, long_video_id, key)
else:

View File

@@ -10,6 +10,7 @@ from ..utils import (
class VRTIE(InfoExtractor):
IE_DESC = 'deredactie.be, sporza.be, cobra.be and cobra.canvas.be'
_VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
_TESTS = [
# deredactie.be

191
youtube_dl/extractor/vrv.py Normal file
View File

@@ -0,0 +1,191 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import json
import hashlib
import hmac
import random
import string
import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
float_or_none,
int_or_none,
)
class VRVBaseIE(InfoExtractor):
_API_DOMAIN = None
_API_PARAMS = {}
_CMS_SIGNING = {}
def _call_api(self, path, video_id, note, data=None):
base_url = self._API_DOMAIN + '/core/' + path
encoded_query = compat_urllib_parse_urlencode({
'oauth_consumer_key': self._API_PARAMS['oAuthKey'],
'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
'oauth_signature_method': 'HMAC-SHA1',
'oauth_timestamp': int(time.time()),
'oauth_version': '1.0',
})
headers = self.geo_verification_headers()
if data:
data = json.dumps(data).encode()
headers['Content-Type'] = 'application/json'
method = 'POST' if data else 'GET'
base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')])
oauth_signature = base64.b64encode(hmac.new(
(self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
base_string.encode(), hashlib.sha1).digest()).decode()
encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '')
return self._download_json(
'?'.join([base_url, encoded_query]), video_id,
note='Downloading %s JSON metadata' % note, headers=headers, data=data)
def _call_cms(self, path, video_id, note):
if not self._CMS_SIGNING:
self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing']
return self._download_json(
self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
def _set_api_params(self, webpage, video_id):
if not self._API_PARAMS:
self._API_PARAMS = self._parse_json(self._search_regex(
r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>',
webpage, 'api config'), video_id)['cxApiParams']
self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
def _get_cms_resource(self, resource_key, video_id):
return self._call_api(
'cms_resource', video_id, 'resource path', data={
'resource_key': resource_key,
})['__links__']['cms_resource']['href']
class VRVIE(VRVBaseIE):
IE_NAME = 'vrv'
_VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
_TEST = {
'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
'info_dict': {
'id': 'GR9PNZ396',
'ext': 'mp4',
'title': 'BOSTON: WHERE THE PAST IS THE PRESENT',
'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f',
'uploader_id': 'seeso',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id,
headers=self.geo_verification_headers())
media_resource = self._parse_json(self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>',
webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
video_data = media_resource.get('json')
if not video_data:
self._set_api_params(webpage, video_id)
episode_path = self._get_cms_resource(
'cms:/episodes/' + video_id, video_id)
video_data = self._call_cms(episode_path, video_id, 'video')
title = video_data['title']
streams_json = media_resource.get('streams', {}).get('json', {})
if not streams_json:
self._set_api_params(webpage, video_id)
streams_path = video_data['__links__']['streams']['href']
streams_json = self._call_cms(streams_path, video_id, 'streams')
audio_locale = streams_json.get('audio_locale')
formats = []
for stream_id, stream in streams_json.get('streams', {}).get('adaptive_hls', {}).items():
stream_url = stream.get('url')
if not stream_url:
continue
stream_id = stream_id or audio_locale
m3u8_formats = self._extract_m3u8_formats(
stream_url, video_id, 'mp4', m3u8_id=stream_id,
note='Downloading %s m3u8 information' % stream_id,
fatal=False)
if audio_locale:
for f in m3u8_formats:
f['language'] = audio_locale
formats.extend(m3u8_formats)
self._sort_formats(formats)
thumbnails = []
for thumbnail in video_data.get('images', {}).get('thumbnails', []):
thumbnail_url = thumbnail.get('source')
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': video_data.get('description'),
'duration': float_or_none(video_data.get('duration_ms'), 1000),
'uploader_id': video_data.get('channel_id'),
'series': video_data.get('series_title'),
'season': video_data.get('season_title'),
'season_number': int_or_none(video_data.get('season_number')),
'season_id': video_data.get('season_id'),
'episode': title,
'episode_number': int_or_none(video_data.get('episode_number')),
'episode_id': video_data.get('production_episode_id'),
}
class VRVSeriesIE(VRVBaseIE):
IE_NAME = 'vrv:series'
_VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)'
_TEST = {
'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider',
'info_dict': {
'id': 'G68VXG3G6',
},
'playlist_mincount': 11,
}
def _real_extract(self, url):
series_id = self._match_id(url)
webpage = self._download_webpage(
url, series_id,
headers=self.geo_verification_headers())
self._set_api_params(webpage, series_id)
seasons_path = self._get_cms_resource(
'cms:/seasons?series_id=' + series_id, series_id)
seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
entries = []
for season in seasons_data.get('items', []):
episodes_path = season['__links__']['season/episodes']['href']
episodes = self._call_cms(episodes_path, series_id, 'episodes')
for episode in episodes.get('items', []):
episode_id = episode['id']
entries.append(self.url_result(
'https://vrv.co/watch/' + episode_id,
'VRV', episode_id, episode.get('title')))
return self.playlist_result(entries, series_id)

View File

@@ -0,0 +1,38 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class VShareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://vshare.io/d/0f64ce6',
'md5': '16d7b8fef58846db47419199ff1ab3e7',
'info_dict': {
'id': '0f64ce6',
'title': 'vl14062007715967',
'ext': 'mp4',
}
}, {
'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://vshare.io/d/%s' % video_id, video_id)
title = self._html_search_regex(
r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title')
video_url = self._search_regex(
r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
webpage, 'video url', group='url')
return {
'id': video_id,
'title': title,
'url': video_url,
}

View File

@@ -1,12 +1,10 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class WorldStarHipHopIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P<id>[^&]+)'
_TESTS = [{
'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO',
'md5': '9d04de741161603bf7071bbf4e883186',
@@ -17,48 +15,26 @@ class WorldStarHipHopIE(InfoExtractor):
}
}, {
'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
'info_dict': {
'id': 'wshh6a7q1ny0G34ZwuIO',
'ext': 'mp4',
'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!'
}
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m_vevo_id = re.search(r'videoId=(.*?)&amp?', webpage)
if m_vevo_id is not None:
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
entries = self._parse_html5_media_entries(url, webpage, video_id)
video_url = self._search_regex(
[r'so\.addVariable\("file","(.*?)"\)',
r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
webpage, 'video URL')
if not entries:
return self.url_result(url, 'Generic')
if 'youtube' in video_url:
return self.url_result(video_url, ie='Youtube')
video_title = self._html_search_regex(
title = self._html_search_regex(
[r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
webpage, 'title')
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
thumbnail = self._html_search_regex(
r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
default=None)
if not thumbnail:
_title = r'candytitles.*>(.*)</span>'
mobj = re.search(_title, webpage)
if mobj is not None:
video_title = mobj.group(1)
return {
info = entries[0]
info.update({
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': thumbnail,
}
'title': title,
})
return info

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..utils import (
decode_packed_codes,
determine_ext,
ExtractorError,
int_or_none,
NO_DEFAULT,
@@ -26,6 +27,9 @@ class XFileShareIE(InfoExtractor):
('vidto.me', 'Vidto'),
('streamin.to', 'Streamin.To'),
('xvidstage.com', 'XVIDSTAGE'),
('vidabc.com', 'Vid ABC'),
('vidbom.com', 'VidBom'),
('vidlo.us', 'vidlo'),
)
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
@@ -95,6 +99,16 @@ class XFileShareIE(InfoExtractor):
# removed by administrator
'url': 'http://xvidstage.com/amfy7atlkx25',
'only_matching': True,
}, {
'url': 'http://vidabc.com/i8ybqscrphfv',
'info_dict': {
'id': 'i8ybqscrphfv',
'ext': 'mp4',
'title': 're:Beauty and the Beast 2017',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
@@ -133,31 +147,45 @@ class XFileShareIE(InfoExtractor):
webpage, 'title', default=None) or self._og_search_title(
webpage, default=None) or video_id).strip()
def extract_video_url(default=NO_DEFAULT):
return self._search_regex(
(r'file\s*:\s*(["\'])(?P<url>http.+?)\1,',
r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1',
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)',
r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'),
webpage, 'file url', default=default, group='url')
def extract_formats(default=NO_DEFAULT):
urls = []
for regex in (
r'file\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
for mobj in re.finditer(regex, webpage):
video_url = mobj.group('url')
if video_url not in urls:
urls.append(video_url)
formats = []
for video_url in urls:
if determine_ext(video_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
else:
formats.append({
'url': video_url,
'format_id': 'sd',
})
if not formats and default is not NO_DEFAULT:
return default
self._sort_formats(formats)
return formats
video_url = extract_video_url(default=None)
formats = extract_formats(default=None)
if not video_url:
if not formats:
webpage = decode_packed_codes(self._search_regex(
r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))",
webpage, 'packed code'))
video_url = extract_video_url()
formats = extract_formats()
thumbnail = self._search_regex(
r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
formats = [{
'format_id': 'sd',
'url': video_url,
'quality': 1,
}]
return {
'id': video_id,
'title': title,

View File

@@ -10,12 +10,14 @@ import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_ord,
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import (
ExtractorError,
get_element_by_attribute,
try_get,
)
@@ -105,7 +107,9 @@ class YoukuIE(InfoExtractor):
if stream.get('channel_type') == 'tail':
continue
format = stream.get('stream_type')
fileid = stream['stream_fileid']
fileid = try_get(
stream, lambda x: x['segs'][0]['fileid'],
compat_str) or stream['stream_fileid']
fileid_dict[format] = fileid
def get_fileid(format, n):

View File

@@ -317,60 +317,60 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
# DASH mp4 video
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
# Dash mp4 audio
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'},
'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'},
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
# Dash webm
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
# Dash webm audio
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
# Dash webm audio with opus inside
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
# RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'},
@@ -1253,21 +1253,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_id = mobj.group(2)
return video_id
def _extract_from_m3u8(self, manifest_url, video_id):
url_map = {}
def _get_urls(_manifest):
lines = _manifest.split('\n')
urls = filter(lambda l: l and not l.startswith('#'),
lines)
return urls
manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
formats_urls = _get_urls(manifest)
for format_url in formats_urls:
itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
url_map[itag] = format_url
return url_map
def _extract_annotations(self, video_id):
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
@@ -1573,19 +1558,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id)
def _map_to_format_list(urlmap):
formats = []
for itag, video_real_url in urlmap.items():
dct = {
'format_id': itag,
'url': video_real_url,
'player_url': player_url,
}
if itag in self._formats:
dct.update(self._formats[itag])
formats.append(dct)
return formats
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
formats = [{
@@ -1718,11 +1690,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
url_map = self._extract_from_m3u8(manifest_url, video_id)
formats = _map_to_format_list(url_map)
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
for a_format in formats:
formats = []
m3u8_formats = self._extract_m3u8_formats(
manifest_url, video_id, 'mp4', fatal=False)
for a_format in m3u8_formats:
itag = self._search_regex(
r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
if itag:
a_format['format_id'] = itag
if itag in self._formats:
dct = self._formats[itag].copy()
dct.update(a_format)
a_format = dct
a_format['player_url'] = player_url
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
formats.append(a_format)
else:
unavailable_message = self._html_search_regex(
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',

View File

@@ -459,11 +459,11 @@ def parseOpts(overrideArguments=None):
downloader.add_option(
'--fragment-retries',
dest='fragment_retries', metavar='RETRIES', default=10,
help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)')
help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
downloader.add_option(
'--skip-unavailable-fragments',
action='store_true', dest='skip_unavailable_fragments', default=True,
help='Skip unavailable fragments (DASH and hlsnative only)')
help='Skip unavailable fragments (DASH, hlsnative and ISM)')
downloader.add_option(
'--abort-on-unavailable-fragment',
action='store_false', dest='skip_unavailable_fragments',

View File

@@ -552,7 +552,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
sub_filenames.append(old_file)
new_file = subtitles_filename(filename, lang, new_ext)
if ext == 'dfxp' or ext == 'ttml' or ext == 'tt':
if ext in ('dfxp', 'ttml', 'tt'):
self._downloader.report_warning(
'You have requested to convert dfxp (TTML) subtitles into another format, '
'which results in style information loss')

View File

@@ -26,7 +26,7 @@ class MetadataFromTitlePP(PostProcessor):
regex += r'(?P<' + match.group(1) + '>.+)'
lastpos = match.end()
if lastpos < len(fmt):
regex += re.escape(fmt[lastpos:len(fmt)])
regex += re.escape(fmt[lastpos:])
return regex
def run(self, info):

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.03.26'
__version__ = '2017.04.15'