Compare commits

...

15 Commits

Author SHA1 Message Date
Sergey M․
0e9a73e612 release 2017.03.20 2017-03-20 00:07:57 +07:00
Sergey M․
0ecdd3adbd [ChangeLog] Actualize 2017-03-20 00:03:58 +07:00
Sergey M․
9487ce03e9 [YoutubeDL] Allow multiple input URLs to be used with stdout as output template 2017-03-19 23:59:40 +07:00
Sergey M․
45e6ad21b4 Credit @mrBliss for vtm (#11912) 2017-03-19 23:48:02 +07:00
Yen Chi Hsuan
68220649fa [ChangeLog] Update after #12099 2017-03-19 20:42:17 +08:00
John Hawkinson
46b18f2349 [BostonGlobe] New. Nonstandard version of Brightcove.
Has a "data-brightcove-video-id" instead of a "data-video-id," otherwise
pretty much just Brightcove. Except the Globe isn't all Brightcove
videos, so fallback to Generic, too.

Also, abstract playlist_from_matches() from generic.py to common.py, and use
it here.

History of these changes can be found in
51170427d4b1143572a498dedaee61863a5b2c5b.
2017-03-19 20:40:31 +08:00
Remita Amine
772b5ff57f [toongoggles] Add new extractor(closes #12171) 2017-03-19 00:45:38 +01:00
Sergey M․
f68ef1e2ab [medialaan] Remove unrelated test 2017-03-18 23:23:47 +07:00
Sergey M․
febfe1e262 [adobepass] Detect and output error on authz token extraction (#12472) 2017-03-18 06:21:31 +07:00
Vijay Singh
5f0daab1ca [openload] Fix extraction 2017-03-18 07:02:55 +08:00
Sergey M․
2a721cdff2 [medialaan] Fix and improve extraction (closes #11912) 2017-03-18 05:58:54 +07:00
mrBliss
e7a51a4c02 [vtm] Add extractor (closes #9974)
Implementation of the approach described in #9974.
2017-03-18 00:27:04 +07:00
Remita Amine
3e5856d860 [discoverynetworks] add support for more domains and bypass geo restiction 2017-03-17 09:53:44 +01:00
Yen Chi Hsuan
ea883a687c [openload] Fix extraction (closes #10408)
Thanks to @makgun02

Ref: http://pastebin.com/raw/JX9gHFUz
2017-03-17 15:22:34 +08:00
Sergey M․
7f3590c43b [test_InfoExtractor] Add some realworld tests for _extract_jwplayer_data 2017-03-17 00:00:01 +07:00
16 changed files with 631 additions and 82 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.16**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.20*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.20**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.03.16
[debug] youtube-dl version 2017.03.20
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -209,3 +209,4 @@ Olivier Bilodeau
Lars Vierbergen
Juanjo Benages
Xiao Di Guan
Thomas Winant

View File

@@ -1,3 +1,18 @@
version 2017.03.20
Core
+ [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as
output template
+ [adobepass] Detect and output error on authz token extraction (#12472)
Extractors
+ [bostonglobe] Add extractor for bostonglobe.com (#12099)
+ [toongoggles] Add support for toongoggles.com (#12171)
+ [medialaan] Add support for Medialaan sites (#9974, #11912)
+ [discoverynetworks] Add support for more domains and bypass geo restiction
* [openload] Fix extraction (#10408)
version 2017.03.16
Core

View File

@@ -108,6 +108,7 @@
- **blinkx**
- **Bloomberg**
- **BokeCC**
- **BostonGlobe**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek
- **BravoTV**
@@ -209,6 +210,7 @@
- **Discovery**
- **DiscoveryGo**
- **DiscoveryGoPlaylist**
- **DiscoveryNetworksDe**
- **Disney**
- **Dotsub**
- **DouyuTV**: 斗鱼
@@ -425,6 +427,7 @@
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **Medialaan**
- **Meipai**: 美拍
- **MelonVOD**
- **META**
@@ -777,12 +780,12 @@
- **ThisAV**
- **ThisOldHouse**
- **tinypic**: tinypic.com videos
- **tlc.de**
- **TMZ**
- **TMZArticle**
- **TNAFlix**
- **TNAFlixNetworkEmbed**
- **toggle**
- **ToonGoggles**
- **Tosh**: Tosh.0
- **tou.tv**
- **Toypics**: Toypics user profile

View File

@@ -8,7 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from test.helper import FakeYDL, expect_dict
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
@@ -84,6 +84,97 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
self,
self.ie._extract_jwplayer_data(r'''
<script type='text/javascript'>
jwplayer('my-video').setup({
file: 'rtmp://192.138.214.154/live/sjclive',
fallback: 'true',
width: '95%',
aspectratio: '16:9',
primary: 'flash',
mediaid:'XEgvuql4'
});
</script>
''', None, require_title=False),
{
'id': 'XEgvuql4',
'formats': [{
'url': 'rtmp://192.138.214.154/live/sjclive',
'ext': 'flv'
}]
})
# from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/
expect_dict(
self,
self.ie._extract_jwplayer_data(r'''
<script type="text/javascript">
jwplayer("mediaplayer").setup({
'videoid': "7564",
'width': "100%",
'aspectratio': "16:9",
'stretching': "exactfit",
'autostart': 'false',
'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf",
'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv",
'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg",
'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4",
'logo.hide': true,
'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip",
'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf",
'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370",
'controlbar': 'bottom',
'modes': [
{type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'}
],
'provider': 'http'
});
//noinspection JSAnnotator
invideo.setup({
adsUrl: "/banner-iframe/?zoneId=32",
adsUrl2: "",
autostart: false
});
</script>
''', 'dummy', require_title=False),
{
'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg',
'formats': [{
'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv',
'ext': 'flv'
}]
})
# from http://www.indiedb.com/games/king-machine/videos
expect_dict(
self,
self.ie._extract_jwplayer_data(r'''
<script>
jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) {
videoAnalytics("play");
}).once("complete", function(event) {
videoAnalytics("completed");
});
</script>
''', 'dummy'),
{
'title': 'king machine trailer 1',
'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg',
'formats': [{
'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4',
'height': 360,
'ext': 'mp4'
}, {
'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4',
'height': 720,
'ext': 'mp4'
}]
})
if __name__ == '__main__':
unittest.main()

View File

@@ -1872,6 +1872,7 @@ class YoutubeDL(object):
"""Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
if (len(url_list) > 1 and
outtmpl != '-' and
'%' not in outtmpl and
self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl)

View File

@@ -1458,6 +1458,8 @@ class AdobePassIE(InfoExtractor):
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
if '<error' in authorize:
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)

View File

@@ -0,0 +1,72 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
)
class BostonGlobeIE(InfoExtractor):
_VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'
_TESTS = [
{
'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html',
'md5': '0a62181079c85c2d2b618c9a738aedaf',
'info_dict': {
'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
'id': '5320421710001',
'ext': 'mp4',
'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
'timestamp': 1486877593,
'upload_date': '20170212',
'uploader_id': '245991542',
},
},
{
# Embedded youtube video; we hand it off to the Generic extractor.
'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html',
'md5': '582b40327089d5c0c949b3c54b13c24b',
'info_dict': {
'title': "Who Is Matt Damon's Favorite Batman?",
'id': 'ZW1QCnlA6Qc',
'ext': 'mp4',
'upload_date': '20170217',
'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb',
'uploader': 'The Late Late Show with James Corden',
'uploader_id': 'TheLateLateShow',
},
'expected_warnings': ['404'],
},
]
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
page_title = self._og_search_title(webpage, default=None)
# <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
entries = []
for video in re.findall(r'(?i)(<video[^>]+>)', webpage):
attrs = extract_attributes(video)
video_id = attrs.get('data-brightcove-video-id')
account_id = attrs.get('data-account')
player_id = attrs.get('data-player')
embed = attrs.get('data-embed')
if video_id and account_id and player_id and embed:
entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, embed, video_id))
if len(entries) == 0:
return self.url_result(url, 'Generic')
elif len(entries) == 1:
return self.url_result(entries[0], 'BrightcoveNew')
else:
return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew')

View File

@@ -36,34 +36,35 @@ from ..utils import (
clean_html,
compiled_regex_type,
determine_ext,
determine_protocol,
error_to_compat_str,
ExtractorError,
extract_attributes,
fix_xml_ampersands,
float_or_none,
GeoRestrictedError,
GeoUtils,
int_or_none,
js_to_json,
mimetype2ext,
orderedSet,
parse_codecs,
parse_duration,
parse_iso8601,
parse_m3u8_attributes,
RegexNotFoundError,
sanitize_filename,
sanitized_Request,
sanitize_filename,
unescapeHTML,
unified_strdate,
unified_timestamp,
update_Request,
update_url_query,
urljoin,
url_basename,
xpath_element,
xpath_text,
xpath_with_ns,
determine_protocol,
parse_duration,
mimetype2ext,
update_Request,
update_url_query,
parse_m3u8_attributes,
extract_attributes,
parse_codecs,
urljoin,
)
@@ -714,6 +715,13 @@ class InfoExtractor(object):
video_info['title'] = video_title
return video_info
def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
urlrs = orderedSet(
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
for m in matches)
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
"""Returns a playlist"""

View File

@@ -9,13 +9,13 @@ from ..compat import (
compat_parse_qs,
compat_urlparse,
)
from ..utils import smuggle_url
class TlcDeIE(InfoExtractor):
IE_NAME = 'tlc.de'
_VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
class DiscoveryNetworksDeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
_TEST = {
_TESTS = [{
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
'info_dict': {
'id': '3235167922001',
@@ -29,7 +29,13 @@ class TlcDeIE(InfoExtractor):
'upload_date': '20140404',
'uploader_id': '1659832546',
},
}
}, {
'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/',
'only_matching': True,
}, {
'url': 'http://www.discovery.de/#5332316765001',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
def _real_extract(self, url):
@@ -39,5 +45,8 @@ class TlcDeIE(InfoExtractor):
title = mobj.group('title')
webpage = self._download_webpage(url, title)
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
brightcove_legacy_url).query)['@videoPlayer'][0]
return self.url_result(smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}),
'BrightcoveNew', brightcove_id)

View File

@@ -117,6 +117,7 @@ from .bleacherreport import (
from .blinkx import BlinkxIE
from .bloomberg import BloombergIE
from .bokecc import BokeCCIE
from .bostonglobe import BostonGlobeIE
from .bpb import BpbIE
from .br import BRIE
from .bravotv import BravoTVIE
@@ -269,6 +270,7 @@ from .discoverygo import (
DiscoveryGoIE,
DiscoveryGoPlaylistIE,
)
from .discoverynetworks import DiscoveryNetworksDeIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
from .dropbox import DropboxIE
@@ -973,7 +975,6 @@ from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
from .threeqsdn import ThreeQSDNIE
from .tinypic import TinyPicIE
from .tlc import TlcDeIE
from .tmz import (
TMZIE,
TMZArticleIE,
@@ -986,6 +987,7 @@ from .tnaflix import (
)
from .toggle import ToggleIE
from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE
from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
@@ -1174,6 +1176,7 @@ from .voxmedia import VoxMediaIE
from .vporn import VpornIE
from .vrt import VRTIE
from .vrak import VrakIE
from .medialaan import MedialaanIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vvvvid import VVVVIDIE

View File

@@ -1841,14 +1841,6 @@ class GenericIE(InfoExtractor):
video_description = self._og_search_description(webpage, default=None)
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
# Helper method
def _playlist_from_matches(matches, getter=None, ie=None):
urlrs = orderedSet(
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
for m in matches)
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for Brightcove Legacy Studio embeds
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls:
@@ -1869,28 +1861,28 @@ class GenericIE(InfoExtractor):
# Look for Brightcove New Studio embeds
bc_urls = BrightcoveNewIE._extract_urls(webpage)
if bc_urls:
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
# Look for ThePlatform embeds
tp_urls = ThePlatformIE._extract_urls(webpage)
if tp_urls:
return _playlist_from_matches(tp_urls, ie='ThePlatform')
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
# Look for Vessel embeds
vessel_urls = VesselIE._extract_urls(webpage)
if vessel_urls:
return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
webpage)
if matches:
return _playlist_from_matches(matches, ie='RtlNl')
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
vimeo_urls = VimeoIE._extract_urls(url, webpage)
if vimeo_urls:
return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
vid_me_embed_url = self._search_regex(
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
@@ -1912,25 +1904,25 @@ class GenericIE(InfoExtractor):
(?:embed|v|p)/.+?)
\1''', webpage)
if matches:
return _playlist_from_matches(
matches, lambda m: unescapeHTML(m[1]))
return self.playlist_from_matches(
matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
# Look for lazyYT YouTube embed
matches = re.findall(
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
if matches:
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
# Look for Wordpress "YouTube Video Importer" plugin
matches = re.findall(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
if matches:
return _playlist_from_matches(matches, lambda m: m[-1])
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
matches = DailymotionIE._extract_urls(webpage)
if matches:
return _playlist_from_matches(matches)
return self.playlist_from_matches(matches, video_id, video_title)
# Look for embedded Dailymotion playlist player (#3822)
m = re.search(
@@ -1939,8 +1931,8 @@ class GenericIE(InfoExtractor):
playlists = re.findall(
r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
if playlists:
return _playlist_from_matches(
playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
return self.playlist_from_matches(
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
# Look for embedded Wistia player
match = re.search(
@@ -2047,8 +2039,9 @@ class GenericIE(InfoExtractor):
if mobj is not None:
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
if embeds:
return _playlist_from_matches(
embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
return self.playlist_from_matches(
embeds, video_id, video_title,
getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
# Look for Aparat videos
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@ -2110,13 +2103,13 @@ class GenericIE(InfoExtractor):
# Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
if matches:
return _playlist_from_matches(
matches, getter=unescapeHTML, ie='FunnyOrDie')
return self.playlist_from_matches(
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
# Look for BBC iPlayer embed
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
if matches:
return _playlist_from_matches(matches, ie='BBCCoUk')
return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
# Look for embedded RUTV player
rutv_url = RUTVIE._extract_url(webpage)
@@ -2131,32 +2124,32 @@ class GenericIE(InfoExtractor):
# Look for embedded SportBox player
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
if sportbox_urls:
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
# Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls:
return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
# Look for embedded TNAFlixNetwork player
tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
if tnaflix_urls:
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
# Look for embedded PornHub player
pornhub_urls = PornHubIE._extract_urls(webpage)
if pornhub_urls:
return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
# Look for embedded DrTuber player
drtuber_urls = DrTuberIE._extract_urls(webpage)
if drtuber_urls:
return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
# Look for embedded RedTube player
redtube_urls = RedTubeIE._extract_urls(webpage)
if redtube_urls:
return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
# Look for embedded Tvigle player
mobj = re.search(
@@ -2202,12 +2195,12 @@ class GenericIE(InfoExtractor):
# Look for embedded soundcloud player
soundcloud_urls = SoundcloudIE._extract_urls(webpage)
if soundcloud_urls:
return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
# Look for tunein player
tunein_urls = TuneInBaseIE._extract_urls(webpage)
if tunein_urls:
return _playlist_from_matches(tunein_urls)
return self.playlist_from_matches(tunein_urls, video_id, video_title)
# Look for embedded mtvservices player
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
@@ -2490,35 +2483,35 @@ class GenericIE(InfoExtractor):
# Look for DBTV embeds
dbtv_urls = DBTVIE._extract_urls(webpage)
if dbtv_urls:
return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
# Look for Videa embeds
videa_urls = VideaIE._extract_urls(webpage)
if videa_urls:
return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
# Look for 20 minuten embeds
twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
if twentymin_urls:
return _playlist_from_matches(
twentymin_urls, ie=TwentyMinutenIE.ie_key())
return self.playlist_from_matches(
twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
# Look for Openload embeds
openload_urls = OpenloadIE._extract_urls(webpage)
if openload_urls:
return _playlist_from_matches(
openload_urls, ie=OpenloadIE.ie_key())
return self.playlist_from_matches(
openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
# Look for VideoPress embeds
videopress_urls = VideoPressIE._extract_urls(webpage)
if videopress_urls:
return _playlist_from_matches(
videopress_urls, ie=VideoPressIE.ie_key())
return self.playlist_from_matches(
videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
# Look for Rutube embeds
rutube_urls = RutubeIE._extract_urls(webpage)
if rutube_urls:
return _playlist_from_matches(
return self.playlist_from_matches(
rutube_urls, ie=RutubeIE.ie_key())
# Looking for http://schema.org/VideoObject

View File

@@ -0,0 +1,259 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
try_get,
unified_timestamp,
urlencode_postdata,
)
class MedialaanIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
(?P<site_id>vtm|q2|vtmkzoom)\.be/
(?:
video(?:/[^/]+/id/|/?\?.*?\baid=)|
(?:[^/]+/)*
)
)
(?P<id>[^/?#&]+)
'''
_NETRC_MACHINE = 'medialaan'
_APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
_SITE_TO_APP_ID = {
'vtm': 'vtm_watch',
'q2': 'q2',
'vtmkzoom': 'vtmkzoom',
}
_TESTS = [{
# vod
'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
'info_dict': {
'id': 'vtm_20170219_VM0678361_vtmwatch',
'ext': 'mp4',
'title': 'Allemaal Chris afl. 6',
'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
'timestamp': 1487533280,
'upload_date': '20170219',
'duration': 2562,
'series': 'Allemaal Chris',
'season': 'Allemaal Chris',
'season_number': 1,
'season_id': '256936078124527',
'episode': 'Allemaal Chris afl. 6',
'episode_number': 6,
'episode_id': '256936078591527',
},
'params': {
'skip_download': True,
},
'skip': 'Requires account credentials',
}, {
# clip
'url': 'http://vtm.be/video?aid=168332',
'info_dict': {
'id': '168332',
'ext': 'mp4',
'title': '"Veronique liegt!"',
'description': 'md5:1385e2b743923afe54ba4adc38476155',
'timestamp': 1489002029,
'upload_date': '20170308',
'duration': 96,
},
}, {
# vod
'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
'only_matching': True,
}, {
# vod
'url': 'http://vtm.be/video?aid=163157',
'only_matching': True,
}, {
# vod
'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
'only_matching': True,
}, {
# clip
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
'only_matching': True,
}]
def _real_initialize(self):
self._logged_in = False
def _login(self):
username, password = self._get_login_info()
if username is None:
self.raise_login_required()
auth_data = {
'APIKey': self._APIKEY,
'sdk': 'js_6.1',
'format': 'json',
'loginID': username,
'password': password,
}
auth_info = self._download_json(
'https://accounts.eu1.gigya.com/accounts.login', None,
note='Logging in', errnote='Unable to log in',
data=urlencode_postdata(auth_data))
error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
if error_message:
raise ExtractorError(
'Unable to login: %s' % error_message, expected=True)
self._uid = auth_info['UID']
self._uid_signature = auth_info['UIDSignature']
self._signature_timestamp = auth_info['signatureTimestamp']
self._logged_in = True
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, site_id = mobj.group('id', 'site_id')
webpage = self._download_webpage(url, video_id)
config = self._parse_json(
self._search_regex(
r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
webpage, 'config', default='{}'), video_id,
transform_source=lambda s: s.replace(
'\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
vod_id = config.get('vodId') or self._search_regex(
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
r'<[^>]+id=["\']vod-(\d+)'),
webpage, 'video_id', default=None)
# clip, no authentication required
if not vod_id:
player = self._parse_json(
self._search_regex(
r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
default=''),
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
if player:
video = player[-1]
info = {
'id': video_id,
'url': video['videoUrl'],
'title': video['title'],
'thumbnail': video.get('imageUrl'),
'timestamp': int_or_none(video.get('createdDate')),
'duration': int_or_none(video.get('duration')),
}
else:
info = self._parse_html5_media_entries(
url, webpage, video_id, m3u8_id='hls')[0]
info.update({
'id': video_id,
'title': self._html_search_meta('description', webpage),
'duration': parse_duration(self._html_search_meta('duration', webpage)),
})
# vod, authentication required
else:
if not self._logged_in:
self._login()
settings = self._parse_json(
self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'drupal settings', default='{}'),
video_id)
def get(container, item):
return try_get(
settings, lambda x: x[container][item],
compat_str) or self._search_regex(
r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
default=None)
app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
data = self._download_json(
'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
video_id, query={
'app_id': app_id,
'user_network': sso,
'UID': self._uid,
'UIDSignature': self._uid_signature,
'signatureTimestamp': self._signature_timestamp,
})
formats = self._extract_m3u8_formats(
data['response']['uri'], video_id, entry_protocol='m3u8_native',
ext='mp4', m3u8_id='hls')
self._sort_formats(formats)
info = {
'id': vod_id,
'formats': formats,
}
api_key = get('vod', 'apiKey')
channel = get('medialaanGigya', 'channel')
if api_key:
videos = self._download_json(
'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
query={
'channels': channel,
'ids': vod_id,
'limit': 1,
'apikey': api_key,
})
if videos:
video = try_get(
videos, lambda x: x['response']['videos'][0], dict)
if video:
def get(container, item, expected_type=None):
return try_get(
video, lambda x: x[container][item], expected_type)
def get_string(container, item):
return get(container, item, compat_str)
info.update({
'series': get_string('program', 'title'),
'season': get_string('season', 'title'),
'season_number': int_or_none(get('season', 'number')),
'season_id': get_string('season', 'id'),
'episode': get_string('episode', 'title'),
'episode_number': int_or_none(get('episode', 'number')),
'episode_id': get_string('episode', 'id'),
'duration': int_or_none(
video.get('duration')) or int_or_none(
video.get('durationMillis'), scale=1000),
'title': get_string('episode', 'title'),
'description': get_string('episode', 'text'),
'timestamp': unified_timestamp(get_string(
'publication', 'begin')),
})
if not info.get('title'):
info['title'] = try_get(
config, lambda x: x['videoConfig']['title'],
compat_str) or self._html_search_regex(
r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
default=None) or self._og_search_title(webpage)
if not info.get('description'):
info['description'] = self._html_search_regex(
r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
webpage, 'description', default=None)
return info

View File

@@ -78,34 +78,45 @@ class OpenloadIE(InfoExtractor):
video_url_chars = []
first_char = ord(ol_id[0])
key = first_char - 50
key = first_char - 55
maxKey = max(2, key)
key = min(maxKey, len(ol_id) - 22)
t = ol_id[key:key + 20]
key = min(maxKey, len(ol_id) - 38)
t = ol_id[key:key + 36]
hashMap = {}
v = ol_id.replace(t, "")
v = ol_id.replace(t, '')
h = 0
while h < len(t):
f = t[h:h + 2]
i = int(f, 16)
hashMap[h / 2] = i
h += 2
f = t[h:h + 3]
i = int(f, 8)
hashMap[h / 3] = i
h += 3
h = 0
H = 0
while h < len(v):
B = v[h:h + 3]
B = ''
C = ''
if len(v) >= h + 2:
B = v[h:h + 2]
if len(v) >= h + 3:
C = v[h:h + 3]
i = int(B, 16)
if (h / 3) % 3 == 0:
i = int(B, 8)
index = (h / 3) % 10
h += 2
if H % 3 == 0:
i = int(C, 8)
h += 1
elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60:
i = int(C, 10)
h += 1
index = H % 12
A = hashMap[index]
i = i ^ 47
i = i ^ A
i ^= 213
i ^= A
video_url_chars.append(compat_chr(i))
h += 3
H += 1
video_url = 'https://openload.co/stream/%s?mime=true'
video_url = video_url % (''.join(video_url_chars))

View File

@@ -0,0 +1,81 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
)
class ToonGogglesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?toongoggles\.com/shows/(?P<show_id>\d+)(?:/[^/]+/episodes/(?P<episode_id>\d+))?'
_TESTS = [{
'url': 'http://www.toongoggles.com/shows/217143/bernard-season-2/episodes/217147/football',
'md5': '18289fc2b951eff6b953a9d8f01e6831',
'info_dict': {
'id': '217147',
'ext': 'mp4',
'title': 'Football',
'uploader_id': '1',
'description': 'Bernard decides to play football in order to be better than Lloyd and tries to beat him no matter how, he even cheats.',
'upload_date': '20160718',
'timestamp': 1468879330,
}
}, {
'url': 'http://www.toongoggles.com/shows/227759/om-nom-stories-around-the-world',
'info_dict': {
'id': '227759',
'title': 'Om Nom Stories Around The World',
},
'playlist_mincount': 11,
}]
def _call_api(self, action, page_id, query):
query.update({
'for_ng': 1,
'for_web': 1,
'show_meta': 1,
'version': 7.0,
})
return self._download_json('http://api.toongoggles.com/' + action, page_id, query=query)
def _parse_episode_data(self, episode_data):
title = episode_data['episode_name']
return {
'_type': 'url_transparent',
'id': episode_data['episode_id'],
'title': title,
'url': 'kaltura:513551:' + episode_data['entry_id'],
'thumbnail': episode_data.get('thumbnail_url'),
'description': episode_data.get('description'),
'duration': parse_duration(episode_data.get('hms')),
'series': episode_data.get('show_name'),
'season_number': int_or_none(episode_data.get('season_num')),
'episode_id': episode_data.get('episode_id'),
'episode': title,
'episode_number': int_or_none(episode_data.get('episode_num')),
'categories': episode_data.get('categories'),
'ie_key': 'Kaltura',
}
def _real_extract(self, url):
show_id, episode_id = re.match(self._VALID_URL, url).groups()
if episode_id:
episode_data = self._call_api('search', episode_id, {
'filter': 'episode',
'id': episode_id,
})['objects'][0]
return self._parse_episode_data(episode_data)
else:
show_data = self._call_api('getepisodesbyshow', show_id, {
'max': 1000000000,
'showid': show_id,
})
entries = []
for episode_data in show_data.get('objects', []):
entries.append(self._parse_episode_data(episode_data))
return self.playlist_result(entries, show_id, show_data.get('show_name'))

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.03.16'
__version__ = '2017.03.20'