Compare commits

...

19 Commits

Author SHA1 Message Date
Sergey M․
f73d7d5074 release 2016.12.18 2016-12-18 19:50:33 +07:00
Sergey M․
52a1d48d9f [ChangeLog] Actualize 2016-12-18 19:48:59 +07:00
Sergey M․
d5e623aaa1 Credit @pyx for meipai (#10718) 2016-12-18 19:46:57 +07:00
Remita Amine
199a47abba [ccma] Add new extractor(closes #11359) 2016-12-18 10:49:10 +01:00
Remita Amine
b42a0bf360 [laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00
Remita Amine
6e416b210c [nbc] fix extraction for msnbc videos(fixes #11466) 2016-12-17 18:11:13 +01:00
Sergey M․
04bf59ff64 [extractors] Add missing twitch imports 2016-12-17 23:03:50 +07:00
Sergey M․
87a449c1ed [extractor/common] Recognize DASH formats in html5 media entries 2016-12-17 23:03:13 +07:00
Sergey M․
93753aad20 [twitch] Adapt to new videos pages schema (closes #11469) 2016-12-17 20:20:23 +07:00
Sergey M․
2786818c33 [meipai] Fix regular videos extraction and improve (closes #10718) 2016-12-17 19:42:34 +07:00
Philip Xu
9b785768ac [meipai] Add extractor 2016-12-17 19:41:35 +07:00
Sergey M․
47c914f995 [ondemandkorea] Fix extraction (closes #10772) 2016-12-17 18:50:12 +07:00
Sergey M․
732d116aa7 [jwplatform] Improve duration extraction 2016-12-17 18:50:07 +07:00
Sergey M․
a495840d3b [jwplatform] Improve subtitles extraction 2016-12-17 18:50:00 +07:00
Sergey M․
b0c65c677f [utils] Improve urljoin 2016-12-17 18:49:55 +07:00
ping
594601f545 [ondemandkorea] Add extractor 2016-12-17 18:49:45 +07:00
Sergey M․
0ae9560eea [vporn] Use urljoin for thumbnail 2016-12-16 23:57:51 +07:00
Remita Amine
dc1f3a9f20 [vvvvid] do not cache the conn_id 2016-12-16 11:05:46 +01:00
Remita Amine
7b1e80792b [vvvvid] Add new extractor(closes #5915) 2016-12-16 09:05:34 +01:00
18 changed files with 630 additions and 121 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.15**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.18**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.12.15
[debug] youtube-dl version 2016.12.18
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -190,3 +190,4 @@ John Hawkinson
Rich Leeper
Zhong Jianxin
Thor77
Philip Xu

View File

@@ -1,3 +1,20 @@
version 2016.12.18
Core
+ [extractor/common] Recognize DASH formats in html5 media entries
Extractors
+ [ccma] Add support for ccma.cat (#11359)
* [laola1tv] Improve extraction
+ [laola1tv] Add support embed URLs (#11460)
* [nbc] Fix extraction for MSNBC videos (#11466)
* [twitch] Adapt to new videos pages URL schema (#11469)
+ [meipai] Add support for meipai.com (#10718)
* [jwplatform] Improve subtitles and duration extraction
+ [ondemandkorea] Add support for ondemandkorea.com (#10772)
+ [vvvvid] Add support for vvvvid.it (#5915)
version 2016.12.15
Core

View File

@@ -131,6 +131,7 @@
- **cbsnews**: CBS News
- **cbsnews:livevideo**: CBS News Live Videos
- **CBSSports**
- **CCMA**
- **CCTV**
- **CDA**
- **CeskaTelevize**
@@ -365,6 +366,7 @@
- **kuwo:song**: 酷我音乐
- **la7.it**
- **Laola1Tv**
- **Laola1TvEmbed**
- **LCI**
- **Lcp**
- **LcpPlay**
@@ -402,6 +404,7 @@
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **Meipai**: 美拍
- **MelonVOD**
- **META**
- **metacafe**
@@ -524,6 +527,7 @@
- **Odnoklassniki**
- **OktoberfestTV**
- **on.aol.com**
- **OnDemandKorea**
- **onet.tv**
- **onet.tv:channel**
- **OnionStudios**
@@ -785,10 +789,13 @@
- **Tweakers**
- **twitch:chapter**
- **twitch:clips**
- **twitch:past_broadcasts**
- **twitch:profile**
- **twitch:stream**
- **twitch:video**
- **twitch:videos:all**
- **twitch:videos:highlights**
- **twitch:videos:past-broadcasts**
- **twitch:videos:uploads**
- **twitch:vod**
- **twitter**
- **twitter:amplify**
@@ -874,6 +881,7 @@
- **VRT**
- **vube**: Vube.com
- **VuClip**
- **VVVVID**
- **VyboryMos**
- **Vzaar**
- **Walla**

View File

@@ -448,11 +448,14 @@ class TestUtil(unittest.TestCase):
def test_urljoin(self):
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', None), None)

View File

@@ -0,0 +1,99 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
clean_html,
)
class CCMAIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
'md5': '7296ca43977c8ea4469e719c609b0871',
'info_dict': {
'id': '5630208',
'ext': 'mp4',
'title': 'L\'espot de La Marató de TV3',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
'timestamp': 1470918540,
'upload_date': '20160811',
}
}, {
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'md5': 'fa3e38f269329a278271276330261425',
'info_dict': {
'id': '943685',
'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
'upload_date': '20171205',
'timestamp': 1512507300,
}
}]
def _real_extract(self, url):
media_type, media_id = re.match(self._VALID_URL, url).groups()
media_data = {}
formats = []
profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc']
for i, profile in enumerate(profiles):
md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type,
'idint': media_id,
'profile': profile,
}, fatal=False)
if md:
media_data = md
media_url = media_data.get('media', {}).get('url')
if media_url:
formats.append({
'format_id': profile,
'url': media_url,
'quality': i,
})
self._sort_formats(formats)
informacio = media_data['informacio']
title = informacio['titol']
durada = informacio.get('durada', {})
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
subtitles = {}
subtitols = media_data.get('subtitols', {})
if subtitols:
sub_url = subtitols.get('url')
if sub_url:
subtitles.setdefault(
subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
'url': sub_url,
})
thumbnails = []
imatges = media_data.get('imatges', {})
if imatges:
thumbnail_url = imatges.get('url')
if thumbnail_url:
thumbnails = [{
'url': thumbnail_url,
'width': int_or_none(imatges.get('amplada')),
'height': int_or_none(imatges.get('alcada')),
}]
return {
'id': media_id,
'title': title,
'description': clean_html(informacio.get('descripcio')),
'duration': duration,
'timestamp': timestamp,
'thumnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
}

View File

@@ -1888,7 +1888,7 @@ class InfoExtractor(object):
})
return formats
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url)
@@ -1905,11 +1905,16 @@ class InfoExtractor(object):
def _media_formats(src, cur_media_type):
full_url = absolute_url(src)
if determine_ext(full_url) == 'm3u8':
ext = determine_ext(full_url)
if ext == 'm3u8':
is_plain_url = False
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
elif ext == 'mpd':
is_plain_url = False
formats = self._extract_mpd_formats(
full_url, video_id, mpd_id=mpd_id)
else:
is_plain_url = True
formats = [{

View File

@@ -150,6 +150,7 @@ from .cbsnews import (
)
from .cbssports import CBSSportsIE
from .ccc import CCCIE
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE
@@ -446,7 +447,10 @@ from .kuwo import (
KuwoMvIE,
)
from .la7 import LA7IE
from .laola1tv import Laola1TvIE
from .laola1tv import (
Laola1TvEmbedIE,
Laola1TvIE,
)
from .lci import LCIIE
from .lcp import (
LcpPlayIE,
@@ -498,6 +502,7 @@ from .mangomolo import (
)
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .meta import METAIE
from .metacafe import MetacafeIE
@@ -662,6 +667,7 @@ from .nzz import NZZIE
from .odatv import OdaTVIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
from .ondemandkorea import OnDemandKoreaIE
from .onet import (
OnetIE,
OnetChannelIE,
@@ -999,7 +1005,10 @@ from .twitch import (
TwitchChapterIE,
TwitchVodIE,
TwitchProfileIE,
TwitchAllVideosIE,
TwitchUploadsIE,
TwitchPastBroadcastsIE,
TwitchHighlightsIE,
TwitchStreamIE,
TwitchClipsIE,
)
@@ -1110,6 +1119,7 @@ from .vporn import VpornIE
from .vrt import VRTIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vvvvid import VVVVIDIE
from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
from .walla import WallaIE

View File

@@ -11,6 +11,7 @@ from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
urljoin,
)
@@ -110,10 +111,14 @@ class JWPlatformBaseIE(InfoExtractor):
tracks = video_data.get('tracks')
if tracks and isinstance(tracks, list):
for track in tracks:
if track.get('file') and track.get('kind') == 'captions':
subtitles.setdefault(track.get('label') or 'en', []).append({
'url': self._proto_relative_url(track['file'])
})
if track.get('kind') != 'captions':
continue
track_url = urljoin(base_url, track.get('file'))
if not track_url:
continue
subtitles.setdefault(track.get('label') or 'en', []).append({
'url': self._proto_relative_url(track_url)
})
entries.append({
'id': this_video_id,
@@ -121,7 +126,7 @@ class JWPlatformBaseIE(InfoExtractor):
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
'formats': formats,
})

View File

@@ -1,25 +1,81 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
ExtractorError,
sanitized_Request,
unified_strdate,
urlencode_postdata,
xpath_element,
xpath_text,
urljoin,
)
class Laola1TvEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
flash_vars = self._search_regex(
r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars')
get_flashvar = lambda x: self._search_regex(r'%s\s*:\s*"([^"]+)"' % x, flash_vars, x)
hd_doc = self._download_xml(
'http://www.laola1.tv/server/hd_video.php', video_id, query={
'play': get_flashvar('streamid'),
'partner': get_flashvar('partnerid'),
'portal': get_flashvar('portalid'),
'lang': get_flashvar('sprache'),
'v5ident': '',
})
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
title = _v('title', fatal=True)
data_abo = urlencode_postdata(
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
token_url = self._download_json(
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access',
video_id, query={
'videoId': _v('id'),
'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
'label': _v('label'),
'area': _v('area'),
}, data=data_abo)['data']['stream-access'][0]
token_doc = self._download_xml(
token_url, video_id, 'Downloading token',
headers=self.geo_verification_headers())
token_attrib = xpath_element(token_doc, './/token').attrib
if token_attrib['status'] != '0':
raise ExtractorError(
'Token error: %s' % token_attrib['comment'], expected=True)
formats = self._extract_akamai_formats(
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
video_id)
self._sort_formats(formats)
categories_str = _v('meta_sports')
categories = categories_str.split(',') if categories_str else []
is_live = _v('islive') == 'true'
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'upload_date': unified_strdate(_v('time_date')),
'uploader': _v('meta_organisation'),
'categories': categories,
'is_live': is_live,
'formats': formats,
}
class Laola1TvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': {
@@ -67,85 +123,20 @@ class Laola1TvIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('slug')
kind = mobj.group('kind')
lang = mobj.group('lang')
portal = mobj.group('portal')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if 'Dieser Livestream ist bereits beendet.' in webpage:
raise ExtractorError('This live stream has already finished.', expected=True)
iframe_url = self._search_regex(
iframe_url = urljoin(url, self._search_regex(
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
webpage, 'iframe url')
video_id = self._search_regex(
r'videoid=(\d+)', iframe_url, 'video id')
iframe = self._download_webpage(compat_urlparse.urljoin(
url, iframe_url), display_id, 'Downloading iframe')
partner_id = self._search_regex(
r'partnerid\s*:\s*(["\'])(?P<partner_id>.+?)\1',
iframe, 'partner id', group='partner_id')
hd_doc = self._download_xml(
'http://www.laola1.tv/server/hd_video.php?%s'
% compat_urllib_parse_urlencode({
'play': video_id,
'partner': partner_id,
'portal': portal,
'lang': lang,
'v5ident': '',
}), display_id)
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
title = _v('title', fatal=True)
VS_TARGETS = {
'video': '2',
'livestream': '17',
}
req = sanitized_Request(
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %
compat_urllib_parse_urlencode({
'videoId': video_id,
'target': VS_TARGETS.get(kind, '2'),
'label': _v('label'),
'area': _v('area'),
}),
urlencode_postdata(
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))))
token_url = self._download_json(req, display_id)['data']['stream-access'][0]
token_doc = self._download_xml(token_url, display_id, 'Downloading token')
token_attrib = xpath_element(token_doc, './/token').attrib
token_auth = token_attrib['auth']
if token_auth in ('blocked', 'restricted', 'error'):
raise ExtractorError(
'Token error: %s' % token_attrib['comment'], expected=True)
formats = self._extract_f4m_formats(
'%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
video_id, f4m_id='hds')
self._sort_formats(formats)
categories_str = _v('meta_sports')
categories = categories_str.split(',') if categories_str else []
webpage, 'iframe url'))
return {
'id': video_id,
'_type': 'url',
'display_id': display_id,
'title': title,
'upload_date': unified_strdate(_v('time_date')),
'uploader': _v('meta_organisation'),
'categories': categories,
'is_live': _v('islive') == 'true',
'formats': formats,
'url': iframe_url,
'ie_key': 'Laola1TvEmbed',
}

View File

@@ -0,0 +1,104 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
unified_timestamp,
)
class MeipaiIE(InfoExtractor):
IE_DESC = '美拍'
_VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
_TESTS = [{
# regular uploaded video
'url': 'http://www.meipai.com/media/531697625',
'md5': 'e3e9600f9e55a302daecc90825854b4f',
'info_dict': {
'id': '531697625',
'ext': 'mp4',
'title': '#葉子##阿桑##余姿昀##超級女聲#',
'description': '#葉子##阿桑##余姿昀##超級女聲#',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 152,
'timestamp': 1465492420,
'upload_date': '20160609',
'view_count': 35511,
'creator': '她她-TATA',
'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'],
}
}, {
# record of live streaming
'url': 'http://www.meipai.com/media/585526361',
'md5': 'ff7d6afdbc6143342408223d4f5fb99a',
'info_dict': {
'id': '585526361',
'ext': 'mp4',
'title': '姿昀和善願 練歌練琴啦😁😁😁',
'description': '姿昀和善願 練歌練琴啦😁😁😁',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 5975,
'timestamp': 1474311799,
'upload_date': '20160919',
'view_count': 1215,
'creator': '她她-TATA',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(
webpage, default=None) or self._html_search_regex(
r'<title[^>]*>([^<]+)</title>', webpage, 'title')
formats = []
# recorded playback of live streaming
m3u8_url = self._html_search_regex(
r'file:\s*encodeURIComponent\((["\'])(?P<url>(?:(?!\1).)+)\1\)',
webpage, 'm3u8 url', group='url', default=None)
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
if not formats:
# regular uploaded video
video_url = self._search_regex(
r'data-video=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video url',
group='url', default=None)
if video_url:
formats.append({
'url': video_url,
'format_id': 'http',
})
timestamp = unified_timestamp(self._og_search_property(
'video:release_date', webpage, 'release date', fatal=False))
tags = self._og_search_property(
'video:tag', webpage, 'tags', default='').split(',')
view_count = int_or_none(self._html_search_meta(
'interactionCount', webpage, 'view count'))
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration'))
creator = self._og_search_property(
'video:director', webpage, 'creator', fatal=False)
return {
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'creator': creator,
'tags': tags,
'formats': formats,
}

View File

@@ -9,6 +9,7 @@ from ..utils import (
lowercase_escape,
smuggle_url,
unescapeHTML,
update_url_query,
)
@@ -208,7 +209,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
'md5': 'af1adfa51312291a017720403826bb64',
'info_dict': {
'id': '269389891880',
'id': 'p_tweet_snow_140529',
'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
@@ -232,7 +233,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
'info_dict': {
'id': '394064451844',
'id': 'nn_netcast_150204',
'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
@@ -245,7 +246,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
'md5': 'a49e173825e5fcd15c13fc297fced39d',
'info_dict': {
'id': '529953347624',
'id': 'x_lon_vwhorn_150922',
'ext': 'mp4',
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
@@ -258,7 +259,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
'md5': '118d7ca3f0bea6534f119c68ef539f71',
'info_dict': {
'id': '669831235788',
'id': 'tdy_al_space_160420',
'ext': 'mp4',
'title': 'See the aurora borealis from space in stunning new NASA video',
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
@@ -271,7 +272,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'info_dict': {
'id': '314487875924',
'id': 'n_hayes_Aimm_140801_272214',
'ext': 'mp4',
'title': 'The chaotic GOP immigration vote',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
@@ -279,7 +280,6 @@ class NBCNewsIE(ThePlatformIE):
'timestamp': 1406937606,
'upload_date': '20140802',
'uploader': 'NBCU-NEWS',
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
},
},
{
@@ -311,28 +311,41 @@ class NBCNewsIE(ThePlatformIE):
else:
# "feature" and "nightly-news" pages use theplatform.com
video_id = mobj.group('mpx_id')
if not video_id.isdigit():
webpage = self._download_webpage(url, video_id)
info = None
bootstrap_json = self._search_regex(
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
webpage, 'bootstrap json', default=None)
webpage = self._download_webpage(url, video_id)
filter_param = 'byId'
bootstrap_json = self._search_regex(
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
webpage, 'bootstrap json', default=None)
if bootstrap_json:
bootstrap = self._parse_json(
bootstrap_json, video_id, transform_source=unescapeHTML)
info = None
if 'results' in bootstrap:
info = bootstrap['results'][0]['video']
elif 'video' in bootstrap:
info = bootstrap['video']
elif 'msnbcVideoInfo' in bootstrap:
info = bootstrap['msnbcVideoInfo']['meta']
elif 'msnbcThePlatform' in bootstrap:
info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
else:
info = bootstrap
video_id = info['mpxId']
if 'guid' in info:
video_id = info['guid']
filter_param = 'byGuid'
elif 'mpxId' in info:
video_id = info['mpxId']
return {
'_type': 'url_transparent',
'id': video_id,
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
'ie_key': 'ThePlatformFeed',
}

View File

@@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .jwplatform import JWPlatformBaseIE
from ..utils import (
ExtractorError,
js_to_json,
)
class OnDemandKoreaIE(JWPlatformBaseIE):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
_TEST = {
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
'info_dict': {
'id': 'ask-us-anything-e43',
'ext': 'mp4',
'title': 'Ask Us Anything : E43',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': 'm3u8 download'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, fatal=False)
if not webpage:
# Page sometimes returns captcha page with HTTP 403
raise ExtractorError(
'Unable to access page. You may have been blocked.',
expected=True)
if 'msg_block_01.png' in webpage:
self.raise_geo_restricted(
'This content is not available in your region')
if 'This video is only available to ODK PLUS members.' in webpage:
raise ExtractorError(
'This video is only available to ODK PLUS members.',
expected=True)
title = self._og_search_title(webpage)
jw_config = self._parse_json(
self._search_regex(
r'(?s)jwplayer\(([\'"])(?:(?!\1).)+\1\)\.setup\s*\((?P<options>.+?)\);',
webpage, 'jw config', group='options'),
video_id, transform_source=js_to_json)
info = self._parse_jwplayer_data(
jw_config, video_id, require_title=False, m3u8_id='hls',
base_url=url)
info.update({
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
})
return info

View File

@@ -300,7 +300,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
response = self._call_api(
self._PLAYLIST_PATH % (channel_id, offset, limit),
channel_id,
'Downloading %s videos JSON page %s'
'Downloading %s JSON page %s'
% (self._PLAYLIST_TYPE, counter_override or counter))
page_entries = self._extract_playlist_page(response)
if not page_entries:
@@ -350,19 +350,72 @@ class TwitchProfileIE(TwitchPlaylistBaseIE):
}
class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
IE_NAME = 'twitch:past_broadcasts'
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
_PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcasts=true'
_PLAYLIST_TYPE = 'past broadcasts'
class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
_VALID_URL_VIDEOS_BASE = r'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE
_PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type='
class TwitchAllVideosIE(TwitchVideosBaseIE):
IE_NAME = 'twitch:videos:all'
_VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
_PLAYLIST_TYPE = 'all videos'
_TEST = {
'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
'url': 'https://www.twitch.tv/spamfish/videos/all',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 54,
'playlist_mincount': 869,
}
class TwitchUploadsIE(TwitchVideosBaseIE):
IE_NAME = 'twitch:videos:uploads'
_VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
_PLAYLIST_TYPE = 'uploads'
_TEST = {
'url': 'https://www.twitch.tv/spamfish/videos/uploads',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 0,
}
class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
IE_NAME = 'twitch:videos:past-broadcasts'
_VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
_PLAYLIST_TYPE = 'past broadcasts'
_TEST = {
'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 0,
}
class TwitchHighlightsIE(TwitchVideosBaseIE):
IE_NAME = 'twitch:videos:highlights'
_VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
_PLAYLIST_TYPE = 'highlights'
_TEST = {
'url': 'https://www.twitch.tv/spamfish/videos/highlights',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 805,
}

View File

@@ -7,6 +7,7 @@ from ..utils import (
ExtractorError,
parse_duration,
str_to_int,
urljoin,
)
@@ -66,10 +67,9 @@ class VpornIE(InfoExtractor):
description = self._html_search_regex(
r'class="(?:descr|description_txt)">(.*?)</div>',
webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
if thumbnail:
thumbnail = 'http://www.vporn.com' + thumbnail
thumbnail = urljoin('http://www.vporn.com', self._html_search_regex(
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description',
default=None))
uploader = self._html_search_regex(
r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',

View File

@@ -0,0 +1,140 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
str_or_none,
)
class VVVVIDIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vvvvid\.it/#!(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
_TESTS = [{
# video_type == 'video/vvvvid'
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
'md5': 'b8d3cecc2e981adc3835adf07f6df91b',
'info_dict': {
'id': '489048',
'ext': 'mp4',
'title': 'Ping Pong',
},
}, {
# video_type == 'video/rcs'
'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
'md5': '33e0edfba720ad73a8782157fdebc648',
'info_dict': {
'id': '482493',
'ext': 'mp4',
'title': 'Episodio 01',
},
}]
_conn_id = None
def _real_initialize(self):
self._conn_id = self._download_json(
'https://www.vvvvid.it/user/login',
None, headers=self.geo_verification_headers())['data']['conn_id']
def _real_extract(self, url):
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
response = self._download_json(
'https://www.vvvvid.it/vvvvid/ondemand/%s/season/%s' % (show_id, season_id),
video_id, headers=self.geo_verification_headers(), query={
'conn_id': self._conn_id,
})
if response['result'] == 'error':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True)
vid = int(video_id)
video_data = list(filter(
lambda episode: episode.get('video_id') == vid, response['data']))[0]
formats = []
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
def ds(h):
g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij"
def f(m):
l = []
o = 0
b = False
m_len = len(m)
while ((not b) and o < m_len):
n = m[o] << 2
o += 1
k = -1
j = -1
if o < m_len:
n += m[o] >> 4
o += 1
if o < m_len:
k = (m[o - 1] << 4) & 255
k += m[o] >> 2
o += 1
if o < m_len:
j = (m[o - 1] << 6) & 255
j += m[o]
o += 1
else:
b = True
else:
b = True
else:
b = True
l.append(n)
if k != -1:
l.append(k)
if j != -1:
l.append(j)
return l
c = []
for e in h:
c.append(g.index(e))
c_len = len(c)
for e in range(c_len * 2 - 1, -1, -1):
a = c[e % c_len] ^ c[(e + 1) % c_len]
c[e % c_len] = a
c = f(c)
d = ''
for e in c:
d += chr(e)
return d
for quality in ('_sd', ''):
embed_code = video_data.get('embed_info' + quality)
if not embed_code:
continue
embed_code = ds(embed_code)
video_type = video_data.get('video_type')
if video_type in ('video/rcs', 'video/kenc'):
formats.extend(self._extract_akamai_formats(
embed_code, video_id))
else:
formats.extend(self._extract_wowza_formats(
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'],
'formats': formats,
'thumbnail': video_data.get('thumbnail'),
'duration': int_or_none(video_data.get('length')),
'series': video_data.get('show_title'),
'season_id': season_id,
'season_number': video_data.get('season_number'),
'episode_id': str_or_none(video_data.get('id')),
'epidode_number': int_or_none(video_data.get('number')),
'episode_title': video_data['title'],
'view_count': int_or_none(video_data.get('views')),
'like_count': int_or_none(video_data.get('video_likes')),
}

View File

@@ -1703,9 +1703,9 @@ def base_url(url):
def urljoin(base, path):
if not isinstance(path, compat_str) or not path:
return None
if re.match(r'https?://', path):
if re.match(r'^(?:https?:)?//', path):
return path
if not isinstance(base, compat_str) or not re.match(r'https?://', base):
if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base):
return None
return compat_urlparse.urljoin(base, path)

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.12.15'
__version__ = '2016.12.18'