Compare commits
129 Commits
2015.10.09
...
2015.10.18
Author | SHA1 | Date | |
---|---|---|---|
![]() |
a6e0afa2bb | ||
![]() |
4285a47f40 | ||
![]() |
e36963e0eb | ||
![]() |
dedd35c6bc | ||
![]() |
608945d44a | ||
![]() |
b1bf063503 | ||
![]() |
14bddf35fb | ||
![]() |
ef6c868f23 | ||
![]() |
6682049dee | ||
![]() |
b0f001a6cb | ||
![]() |
dd67702a3e | ||
![]() |
05a3879f1c | ||
![]() |
4a7b790384 | ||
![]() |
09ff81316e | ||
![]() |
c88aec845a | ||
![]() |
77a54b6a65 | ||
![]() |
575036b405 | ||
![]() |
f6dfd6603a | ||
![]() |
e04edad621 | ||
![]() |
f322bfb063 | ||
![]() |
014e880372 | ||
![]() |
01d22d4703 | ||
![]() |
48aae2d2cf | ||
![]() |
c571dea953 | ||
![]() |
8b172c2e10 | ||
![]() |
0a67a3632b | ||
![]() |
985e4fdc07 | ||
![]() |
1e399778ee | ||
![]() |
2e022397c4 | ||
![]() |
02835c6bf4 | ||
![]() |
91816e8f16 | ||
![]() |
10c38c7ca2 | ||
![]() |
94a773feb9 | ||
![]() |
448ef1f31c | ||
![]() |
49941c4e4f | ||
![]() |
80f48920c8 | ||
![]() |
5a11b793fe | ||
![]() |
7593fbaa12 | ||
![]() |
2eb0f72a0e | ||
![]() |
8e5b121948 | ||
![]() |
648e6a1ffe | ||
![]() |
583882fdce | ||
![]() |
9eb31b265f | ||
![]() |
ddeb1b3de2 | ||
![]() |
59fe4824f8 | ||
![]() |
dd8417526b | ||
![]() |
09670d5ba4 | ||
![]() |
41a7b00f18 | ||
![]() |
350c948133 | ||
![]() |
e5e9966199 | ||
![]() |
fbd9f6ea80 | ||
![]() |
6df7179e6c | ||
![]() |
36eb802baf | ||
![]() |
2ccb37beb9 | ||
![]() |
246ce10858 | ||
![]() |
ba717dca97 | ||
![]() |
1e52776ac3 | ||
![]() |
8daeeedc06 | ||
![]() |
6744f36db7 | ||
![]() |
fafc7950e2 | ||
![]() |
a13d06de42 | ||
![]() |
1db82381e3 | ||
![]() |
cb8961eeed | ||
![]() |
af98f8ff37 | ||
![]() |
caf80631f0 | ||
![]() |
1812afb7b3 | ||
![]() |
9fb66c780c | ||
![]() |
ab953c64a0 | ||
![]() |
db0a8ad979 | ||
![]() |
1c29e81e62 | ||
![]() |
7a6d76a64d | ||
![]() |
4aa353673b | ||
![]() |
f49b3d6efc | ||
![]() |
36bb63fad1 | ||
![]() |
1f36085df9 | ||
![]() |
26669ea3cf | ||
![]() |
3dc582e5ea | ||
![]() |
506e261d20 | ||
![]() |
b30c4992a9 | ||
![]() |
3eeff489e8 | ||
![]() |
5946cda7c6 | ||
![]() |
ee2edd838a | ||
![]() |
73e732eb6b | ||
![]() |
cd7364a89c | ||
![]() |
57d1db8dd0 | ||
![]() |
964e7b2dd0 | ||
![]() |
f101079ae0 | ||
![]() |
0f61db4469 | ||
![]() |
1bd3903582 | ||
![]() |
da4daed5ef | ||
![]() |
7d49502ab0 | ||
![]() |
03e3b4e119 | ||
![]() |
d8348c351d | ||
![]() |
70cb4d51c9 | ||
![]() |
7c84562945 | ||
![]() |
68f3b61f0e | ||
![]() |
08fd433f3e | ||
![]() |
35a3ff1d33 | ||
![]() |
fff496c689 | ||
![]() |
e5c209a1bc | ||
![]() |
75f105d455 | ||
![]() |
00cde0b8dc | ||
![]() |
58cd7e173e | ||
![]() |
f4076bb736 | ||
![]() |
6f7893653c | ||
![]() |
55ebae26f9 | ||
![]() |
ae8bdfd1a1 | ||
![]() |
6a74719060 | ||
![]() |
f6295bcb04 | ||
![]() |
78f9d84318 | ||
![]() |
b1ec70e4a9 | ||
![]() |
c936d8cc7b | ||
![]() |
e6174ee975 | ||
![]() |
f790c43f6e | ||
![]() |
8119597d6f | ||
![]() |
43abd79950 | ||
![]() |
97ae4d166c | ||
![]() |
87cc0fbd18 | ||
![]() |
baf39a1aa8 | ||
![]() |
975977860d | ||
![]() |
b5d48cb1ef | ||
![]() |
de66571371 | ||
![]() |
dab062fb6e | ||
![]() |
6a959f2e52 | ||
![]() |
47c165c3a9 | ||
![]() |
4180a3d8b7 | ||
![]() |
9e7e0dffd5 | ||
![]() |
c3dea3f878 | ||
![]() |
f57f84f606 |
@@ -81,6 +81,7 @@
|
||||
- **CBSSports**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
- **Chaturbate**
|
||||
- **Chilloutzone**
|
||||
- **chirbit**
|
||||
- **chirbit:profile**
|
||||
@@ -158,6 +159,7 @@
|
||||
- **facebook**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **Fczenit**
|
||||
- **fernsehkritik.tv**
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
@@ -280,7 +282,6 @@
|
||||
- **Malemotion**
|
||||
- **MDR**
|
||||
- **media.ccc.de**
|
||||
- **MegaVideoz**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
- **Mgoon**
|
||||
@@ -587,7 +588,8 @@
|
||||
- **twitch:stream**
|
||||
- **twitch:video**
|
||||
- **twitch:vod**
|
||||
- **TwitterCard**
|
||||
- **twitter**
|
||||
- **twitter:card**
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
|
@@ -35,10 +35,18 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
<meta name="og:title" content='Foo'/>
|
||||
<meta content="Some video's description " name="og:description"/>
|
||||
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/>
|
||||
<meta content='application/x-shockwave-flash' property='og:video:type'>
|
||||
<meta content='Foo' property=og:foobar>
|
||||
<meta name="og:test1" content='foo > < bar'/>
|
||||
<meta name="og:test2" content="foo >//< bar"/>
|
||||
'''
|
||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||
self.assertEqual(ie._og_search_video_url(html, default=None), None)
|
||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||
|
||||
def test_html_search_meta(self):
|
||||
ie = self.ie
|
||||
|
@@ -57,5 +57,14 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_flat_playlist_titles(self):
|
||||
dl = FakeYDL()
|
||||
dl.params['extract_flat'] = True
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertIsPlaylist(result)
|
||||
for entry in result['entries']:
|
||||
self.assertTrue(entry.get('title'))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -37,6 +37,7 @@ from .compat import (
|
||||
compat_tokenize_tokenize,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
compat_urllib_request_DataHandler,
|
||||
)
|
||||
from .utils import (
|
||||
ContentTooShortError,
|
||||
@@ -1967,8 +1968,9 @@ class YoutubeDL(object):
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
data_handler = compat_urllib_request_DataHandler()
|
||||
opener = compat_urllib_request.build_opener(
|
||||
proxy_handler, https_handler, cookie_processor, ydlh)
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
|
||||
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
|
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
import collections
|
||||
import email
|
||||
import getpass
|
||||
import io
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
@@ -38,6 +41,11 @@ try:
|
||||
except ImportError: # Python 2
|
||||
import urlparse as compat_urlparse
|
||||
|
||||
try:
|
||||
import urllib.response as compat_urllib_response
|
||||
except ImportError: # Python 2
|
||||
import urllib as compat_urllib_response
|
||||
|
||||
try:
|
||||
import http.cookiejar as compat_cookiejar
|
||||
except ImportError: # Python 2
|
||||
@@ -155,6 +163,40 @@ except ImportError: # Python 2
|
||||
string = string.replace('+', ' ')
|
||||
return compat_urllib_parse_unquote(string, encoding, errors)
|
||||
|
||||
try:
|
||||
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
||||
except ImportError: # Python < 3.4
|
||||
# Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
|
||||
class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
|
||||
def data_open(self, req):
|
||||
# data URLs as specified in RFC 2397.
|
||||
#
|
||||
# ignores POSTed data
|
||||
#
|
||||
# syntax:
|
||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
||||
# data := *urlchar
|
||||
# parameter := attribute "=" value
|
||||
url = req.get_full_url()
|
||||
|
||||
scheme, data = url.split(":", 1)
|
||||
mediatype, data = data.split(",", 1)
|
||||
|
||||
# even base64 encoded data URLs might be quoted so unquote in any case:
|
||||
data = compat_urllib_parse_unquote_to_bytes(data)
|
||||
if mediatype.endswith(";base64"):
|
||||
data = binascii.a2b_base64(data)
|
||||
mediatype = mediatype[:-7]
|
||||
|
||||
if not mediatype:
|
||||
mediatype = "text/plain;charset=US-ASCII"
|
||||
|
||||
headers = email.message_from_string(
|
||||
"Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
|
||||
|
||||
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
|
||||
|
||||
try:
|
||||
compat_basestring = basestring # Python 2
|
||||
except NameError:
|
||||
@@ -489,6 +531,8 @@ __all__ = [
|
||||
'compat_urllib_parse_unquote_to_bytes',
|
||||
'compat_urllib_parse_urlparse',
|
||||
'compat_urllib_request',
|
||||
'compat_urllib_request_DataHandler',
|
||||
'compat_urllib_response',
|
||||
'compat_urlparse',
|
||||
'compat_urlretrieve',
|
||||
'compat_xml_parse_error',
|
||||
|
@@ -325,7 +325,7 @@ class FileDownloader(object):
|
||||
)
|
||||
|
||||
# Check file already present
|
||||
if filename != '-' and nooverwrites_and_exists or continuedl_and_exists:
|
||||
if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
|
||||
self.report_file_already_downloaded(filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
|
@@ -30,7 +30,7 @@ class HlsFD(FileDownloader):
|
||||
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
if info_dict['http_headers']:
|
||||
if info_dict['http_headers'] and re.match(r'^https?://', url):
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
args += [
|
||||
|
@@ -105,7 +105,7 @@ class RtmpFD(FileDownloader):
|
||||
protocol = info_dict.get('rtmp_protocol', None)
|
||||
real_time = info_dict.get('rtmp_real_time', False)
|
||||
no_resume = info_dict.get('no_resume', False)
|
||||
continue_dl = info_dict.get('continuedl', True)
|
||||
continue_dl = self.params.get('continuedl', True)
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
@@ -76,6 +76,7 @@ from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chaturbate import ChaturbateIE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .chirbit import (
|
||||
ChirbitIE,
|
||||
@@ -166,6 +167,7 @@ from .extremetube import ExtremeTubeIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .fczenit import FczenitIE
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
@@ -317,7 +319,6 @@ from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .mdr import MDRIE
|
||||
from .megavideoz import MegaVideozIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
@@ -689,7 +690,7 @@ from .twitch import (
|
||||
TwitchBookmarksIE,
|
||||
TwitchStreamIE,
|
||||
)
|
||||
from .twitter import TwitterCardIE
|
||||
from .twitter import TwitterCardIE, TwitterIE
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
|
@@ -41,7 +41,8 @@ class AdultSwimIE(InfoExtractor):
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
|
||||
'title': 'Rick and Morty - Pilot',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
}
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
|
||||
'playlist': [
|
||||
@@ -134,7 +135,13 @@ class AdultSwimIE(InfoExtractor):
|
||||
show = bootstrapped_data['show']
|
||||
show_title = show['title']
|
||||
stream = video_info.get('stream')
|
||||
clips = [stream] if stream else video_info['clips']
|
||||
clips = [stream] if stream else video_info.get('clips')
|
||||
if not clips:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.'
|
||||
if video_info.get('auth') is True else 'Unable to find stream or clips',
|
||||
expected=True)
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in clips]
|
||||
|
||||
episode_id = video_info['id']
|
||||
|
@@ -10,6 +10,8 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -52,11 +54,11 @@ class BandcampIE(InfoExtractor):
|
||||
ext, abr_str = format_id.split('-', 1)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'url': self._proto_relative_url(format_url, 'http:'),
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
'acodec': ext,
|
||||
'abr': int(abr_str),
|
||||
'abr': int_or_none(abr_str),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
@@ -65,7 +67,7 @@ class BandcampIE(InfoExtractor):
|
||||
'id': compat_str(data['id']),
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'duration': float(data['duration']),
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('No free songs found')
|
||||
|
@@ -11,6 +11,8 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
@@ -28,6 +30,14 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||
]
|
||||
|
||||
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
|
||||
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
|
||||
|
||||
_NAMESPACES = (
|
||||
_MEDIASELECTION_NS,
|
||||
_EMP_PLAYLIST_NS,
|
||||
)
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
@@ -193,6 +203,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
kind = connection.get('kind')
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
@@ -218,7 +229,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier,
|
||||
'format_id': supplier or kind or protocol,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
@@ -238,16 +249,24 @@ class BBCCoUkIE(InfoExtractor):
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||
|
||||
def _findall_ns(self, element, xpath):
|
||||
elements = []
|
||||
for ns in self._NAMESPACES:
|
||||
elements.extend(element.findall(xpath % ns))
|
||||
return elements
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
|
||||
if error is None:
|
||||
media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
|
||||
if error is not None:
|
||||
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
return self._findall_ns(media_selection, './{%s}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||
return self._findall_ns(media, './{%s}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
@@ -261,13 +280,14 @@ class BBCCoUkIE(InfoExtractor):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
if service:
|
||||
format['format_id'] = '%s_%s' % (service, format['format_id'])
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
@@ -382,7 +402,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
url, playlist_id, 'Downloading legacy playlist XML')
|
||||
|
||||
def _extract_from_legacy_playlist(self, playlist, playlist_id):
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
@@ -399,8 +419,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
|
||||
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
|
||||
description = description_el.text if description_el is not None else None
|
||||
|
||||
def get_programme_id(item):
|
||||
def get_from_attributes(item):
|
||||
@@ -409,16 +430,18 @@ class BBCCoUkIE(InfoExtractor):
|
||||
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||
return value
|
||||
get_from_attributes(item)
|
||||
mediator = item.find('./{http://bbc.co.uk/2008/emp/playlist}mediator')
|
||||
mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
|
||||
if mediator is not None:
|
||||
return get_from_attributes(mediator)
|
||||
|
||||
programme_id = get_programme_id(item)
|
||||
duration = int_or_none(item.get('duration'))
|
||||
# TODO: programme_id can be None and media items can be incorporated right inside
|
||||
# playlist's item (e.g. http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
# as f4m and m3u8
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
else:
|
||||
formats, subtitles = self._process_media_selector(item, playlist_id)
|
||||
programme_id = playlist_id
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
|
||||
@@ -470,6 +493,9 @@ class BBCIE(BBCCoUkIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides HQ HLS streams but fails with geolocation in some cases when it's
|
||||
# even not geo restricted at all
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
# Provides more formats, namely direct mp4 links, but fails on some videos with
|
||||
# notukerror for non UK (?) users (e.g.
|
||||
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
@@ -479,8 +505,7 @@ class BBCIE(BBCCoUkIE):
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
# article with multiple videos embedded with data-media-meta containing
|
||||
# playlist.sxml, externalId and no direct video links
|
||||
# article with multiple videos embedded with data-playable containing vpids
|
||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
@@ -489,7 +514,7 @@ class BBCIE(BBCCoUkIE):
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# article with multiple videos embedded with data-media-meta (more videos)
|
||||
# article with multiple videos embedded with data-playable (more videos)
|
||||
'url': 'http://www.bbc.com/news/business-28299555',
|
||||
'info_dict': {
|
||||
'id': 'business-28299555',
|
||||
@@ -500,6 +525,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'skip': 'Save time',
|
||||
}, {
|
||||
# article with multiple videos embedded with `new SMP()`
|
||||
# broken
|
||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
|
||||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
@@ -507,12 +533,13 @@ class BBCIE(BBCCoUkIE):
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
# single video embedded with mediaAssetPage.init()
|
||||
# single video embedded with data-playable containing vpid
|
||||
'url': 'http://www.bbc.com/news/world-europe-32041533',
|
||||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'description': 'md5:2868290467291b37feda7863f7a83f54',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
'upload_date': '20150324',
|
||||
@@ -522,15 +549,14 @@ class BBCIE(BBCCoUkIE):
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with single video embedded with data-media-meta containing
|
||||
# direct video links (for now these are extracted) and playlist.xml (with
|
||||
# media items as f4m and m3u8 - currently unsupported)
|
||||
# article with single video embedded with data-playable containing XML playlist
|
||||
# with direct video links as progressiveDownloadUrl (for now these are extracted)
|
||||
# and playlist with f4m and m3u8 as streamingUrl
|
||||
'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
|
||||
'info_dict': {
|
||||
'id': '150615_telabyad_kentin_cogu',
|
||||
'ext': 'mp4',
|
||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||
'duration': 47,
|
||||
'timestamp': 1434397334,
|
||||
'upload_date': '20150615',
|
||||
},
|
||||
@@ -538,13 +564,12 @@ class BBCIE(BBCCoUkIE):
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video embedded with mediaAssetPage.init() (regional section)
|
||||
# single video embedded with data-playable containing XML playlists (regional section)
|
||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'info_dict': {
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'duration': 87,
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
},
|
||||
@@ -586,27 +611,34 @@ class BBCIE(BBCCoUkIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyundai Santa Fe Sport: Rock star',
|
||||
'description': 'md5:b042a26142c4154a6e472933cf20793d',
|
||||
'timestamp': 1368473503,
|
||||
'upload_date': '20130513',
|
||||
'timestamp': 1415867444,
|
||||
'upload_date': '20141113',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video with playlist.sxml URL
|
||||
# single video with playlist.sxml URL in playlist param
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'md5:398fca0e2e701c609d726e034fa1fc89',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with multiple videos embedded with playlist.sxml in playlist param
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': '34475836',
|
||||
'title': 'What Liverpool can expect from Klopp',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
@@ -648,40 +680,107 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
return [], []
|
||||
|
||||
def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
|
||||
programme_id, title, description, duration, formats, subtitles = \
|
||||
self._process_legacy_playlist_url(url, playlist_id)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
[r'"datePublished":\s*"([^"]+)',
|
||||
r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
|
||||
r'itemprop="datePublished"[^>]+datetime="([^"]+)"'],
|
||||
webpage, 'date', default=None))
|
||||
timestamp = None
|
||||
playlist_title = None
|
||||
playlist_description = None
|
||||
|
||||
# single video with playlist.sxml URL (e.g. http://www.bbc.com/sport/0/football/3365340ng)
|
||||
playlist = self._search_regex(
|
||||
r'<param[^>]+name="playlist"[^>]+value="([^"]+)"',
|
||||
webpage, 'playlist', default=None)
|
||||
if playlist:
|
||||
programme_id, title, description, duration, formats, subtitles = \
|
||||
self._process_legacy_playlist_url(playlist, playlist_id)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
ld = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script type="application/ld\+json">(.+?)</script>',
|
||||
webpage, 'ld json', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
if ld:
|
||||
timestamp = parse_iso8601(ld.get('datePublished'))
|
||||
playlist_title = ld.get('headline')
|
||||
playlist_description = ld.get('articleBody')
|
||||
|
||||
if not timestamp:
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
[r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
|
||||
r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
|
||||
r'"datePublished":\s*"([^"]+)'],
|
||||
webpage, 'date', default=None))
|
||||
|
||||
entries = []
|
||||
|
||||
# article with multiple videos embedded with playlist.sxml (e.g.
|
||||
# http://www.bbc.com/sport/0/football/34475836)
|
||||
playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
|
||||
if playlists:
|
||||
entries = [
|
||||
self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
|
||||
for playlist_url in playlists]
|
||||
|
||||
# news article with multiple videos embedded with data-playable
|
||||
data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
|
||||
if data_playables:
|
||||
for _, data_playable_json in data_playables:
|
||||
data_playable = self._parse_json(
|
||||
unescapeHTML(data_playable_json), playlist_id, fatal=False)
|
||||
if not data_playable:
|
||||
continue
|
||||
settings = data_playable.get('settings', {})
|
||||
if settings:
|
||||
# data-playable with video vpid in settings.playlistObject.items (e.g.
|
||||
# http://www.bbc.com/news/world-us-canada-34473351)
|
||||
playlist_object = settings.get('playlistObject', {})
|
||||
if playlist_object:
|
||||
items = playlist_object.get('items')
|
||||
if items and isinstance(items, list):
|
||||
title = playlist_object['title']
|
||||
description = playlist_object.get('summary')
|
||||
duration = int_or_none(items[0].get('duration'))
|
||||
programme_id = items[0].get('vpid')
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
else:
|
||||
# data-playable without vpid but with a playlist.sxml URLs
|
||||
# in otherSettings.playlist (e.g.
|
||||
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
||||
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
||||
if playlist:
|
||||
entries.append(self._extract_from_playlist_sxml(
|
||||
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
|
||||
|
||||
if entries:
|
||||
playlist_title = playlist_title or remove_end(self._og_search_title(webpage), ' - BBC News')
|
||||
playlist_description = playlist_description or self._og_search_description(webpage, default=None)
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
[r'data-video-player-vpid="([\da-z]{8})"',
|
||||
r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'],
|
||||
webpage, 'vpid', default=None)
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
|
@@ -1,65 +1,67 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': '1bff67111adb785c51d1b42959ec10e5',
|
||||
'md5': '46c384def73b33dbc581262e5ee67cef',
|
||||
'info_dict': {
|
||||
'id': '5416503',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sultry Striptease',
|
||||
'description': 'md5:6db3c6177972822aaba18652ff59c773',
|
||||
'categories': list, # NSFW
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
|
||||
'timestamp': 1391813355,
|
||||
'upload_date': '20140207',
|
||||
'duration': 383,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
quality_arr = self._search_regex(
|
||||
r'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage, 'quality formats')
|
||||
|
||||
formats = [{
|
||||
'url': fmt[1],
|
||||
'format_id': fmt[0],
|
||||
'height': int(fmt[0][:-1]),
|
||||
} for fmt in re.findall(r"'([^']+)'\s*:\s*'([^']+)'", quality_arr)]
|
||||
video = self._download_json(
|
||||
'http://beeg.com/api/v1/video/%s' % video_id, video_id)
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video.items():
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(video_url.replace('{DATA_MARKERS}', ''), 'http:'),
|
||||
'format_id': format_id,
|
||||
'height': int(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
|
||||
title = video['title']
|
||||
video_id = video.get('id') or video_id
|
||||
display_id = video.get('code')
|
||||
description = video.get('desc')
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<meta name="description" content="([^"]*)"',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'\'previewer.url\'\s*:\s*"([^"]*)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
timestamp = parse_iso8601(video.get('date'), ' ')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
categories_str = self._html_search_regex(
|
||||
r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
|
||||
categories = (
|
||||
None if categories_str is None
|
||||
else categories_str.split(','))
|
||||
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
fix_xml_ampersands,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -17,26 +17,24 @@ class BildIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '38184146',
|
||||
'ext': 'mp4',
|
||||
'title': 'BILD hat sie getestet',
|
||||
'title': 'Das können die neuen iPads',
|
||||
'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 196,
|
||||
'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml"
|
||||
doc = self._download_xml(xml_url, video_id, transform_source=fix_xml_ampersands)
|
||||
|
||||
duration = int_or_none(doc.attrib.get('duration'), scale=1000)
|
||||
video_data = self._download_json(
|
||||
url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': doc.attrib['ueberschrift'],
|
||||
'description': doc.attrib.get('text'),
|
||||
'url': doc.attrib['src'],
|
||||
'thumbnail': doc.attrib.get('img'),
|
||||
'duration': duration,
|
||||
'title': unescapeHTML(video_data['title']).strip(),
|
||||
'description': unescapeHTML(video_data.get('description')),
|
||||
'url': video_data['clipList'][0]['srces'][0]['src'],
|
||||
'thumbnail': video_data.get('poster'),
|
||||
'duration': int_or_none(video_data.get('durationSec')),
|
||||
}
|
||||
|
@@ -4,38 +4,53 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class Canalc2IE(InfoExtractor):
|
||||
IE_NAME = 'canalc2.tv'
|
||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?canalc2\.tv/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
||||
'url': 'http://www.canalc2.tv/video/12163',
|
||||
'md5': '060158428b650f896c542dfbb3d6487f',
|
||||
'info_dict': {
|
||||
'id': '12163',
|
||||
'ext': 'mp4',
|
||||
'title': 'Terrasses du Numérique'
|
||||
'ext': 'flv',
|
||||
'title': 'Terrasses du Numérique',
|
||||
'duration': 122,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = re.match(self._VALID_URL, url).group('id')
|
||||
# We need to set the voir field for getting the file name
|
||||
url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
file_name = self._search_regex(
|
||||
r"so\.addVariable\('file','(.*?)'\);",
|
||||
webpage, 'file name')
|
||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
|
||||
video_url = self._search_regex(
|
||||
r'jwplayer\((["\'])Player\1\)\.setup\({[^}]*file\s*:\s*(["\'])(?P<file>.+?)\2',
|
||||
webpage, 'video_url', group='file')
|
||||
formats = [{'url': video_url}]
|
||||
if video_url.startswith('rtmp://'):
|
||||
rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
|
||||
formats[0].update({
|
||||
'url': rtmp.group('url'),
|
||||
'ext': 'flv',
|
||||
'app': rtmp.group('app'),
|
||||
'play_path': rtmp.group('play_path'),
|
||||
'page_url': url,
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'class="evenement8">(.*?)</a>', webpage, 'title')
|
||||
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -3,7 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_filesize,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class Channel9IE(InfoExtractor):
|
||||
@@ -28,7 +32,7 @@ class Channel9IE(InfoExtractor):
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
'duration': 4576,
|
||||
'thumbnail': 'http://video.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'session_code': 'KOS002',
|
||||
'session_day': 'Day 1',
|
||||
'session_room': 'Arena 1A',
|
||||
@@ -44,31 +48,29 @@ class Channel9IE(InfoExtractor):
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'duration': 1540,
|
||||
'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'authors': ['Mike Wilmot'],
|
||||
},
|
||||
},
|
||||
{
|
||||
# low quality mp4 is best
|
||||
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'info_dict': {
|
||||
'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ranges for the Standard Library',
|
||||
'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
|
||||
'duration': 5646,
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||
|
||||
# Sorted by quality
|
||||
_known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
|
||||
|
||||
def _restore_bytes(self, formatted_size):
|
||||
if not formatted_size:
|
||||
return 0
|
||||
m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
|
||||
if not m:
|
||||
return 0
|
||||
units = m.group('units')
|
||||
try:
|
||||
exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper())
|
||||
except ValueError:
|
||||
return 0
|
||||
size = float(m.group('size'))
|
||||
return int(size * (1024 ** exponent))
|
||||
|
||||
def _formats_from_html(self, html):
|
||||
FORMAT_REGEX = r'''
|
||||
(?x)
|
||||
@@ -78,16 +80,20 @@ class Channel9IE(InfoExtractor):
|
||||
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
|
||||
</div>)? # File size part may be missing
|
||||
'''
|
||||
# Extract known formats
|
||||
quality = qualities((
|
||||
'MP3', 'MP4',
|
||||
'Low Quality WMV', 'Low Quality MP4',
|
||||
'Mid Quality WMV', 'Mid Quality MP4',
|
||||
'High Quality WMV', 'High Quality MP4'))
|
||||
formats = [{
|
||||
'url': x.group('url'),
|
||||
'format_id': x.group('quality'),
|
||||
'format_note': x.group('note'),
|
||||
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
|
||||
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
|
||||
'preference': self._known_formats.index(x.group('quality')),
|
||||
'filesize_approx': parse_filesize(x.group('filesize')),
|
||||
'quality': quality(x.group('quality')),
|
||||
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
|
||||
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
|
||||
} for x in list(re.finditer(FORMAT_REGEX, html))]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -158,7 +164,7 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
def _extract_session_day(self, html):
|
||||
m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
|
||||
return m.group('day') if m is not None else None
|
||||
return m.group('day').strip() if m is not None else None
|
||||
|
||||
def _extract_session_room(self, html):
|
||||
m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
|
||||
@@ -224,12 +230,12 @@ class Channel9IE(InfoExtractor):
|
||||
if contents is None:
|
||||
return contents
|
||||
|
||||
authors = self._extract_authors(html)
|
||||
if len(contents) > 1:
|
||||
raise ExtractorError('Got more than one entry')
|
||||
result = contents[0]
|
||||
result['authors'] = self._extract_authors(html)
|
||||
|
||||
for content in contents:
|
||||
content['authors'] = authors
|
||||
|
||||
return contents
|
||||
return result
|
||||
|
||||
def _extract_session(self, html, content_path):
|
||||
contents = self._extract_content(html, content_path)
|
||||
|
50
youtube_dl/extractor/chaturbate.py
Normal file
50
youtube_dl/extractor/chaturbate.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ChaturbateIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.chaturbate.com/siswet19/',
|
||||
'info_dict': {
|
||||
'id': 'siswet19',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^siswet19 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'age_limit': 18,
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://en.chaturbate.com/siswet19/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'src=(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage,
|
||||
'playlist', default=None, group='url')
|
||||
|
||||
if not m3u8_url:
|
||||
error = self._search_regex(
|
||||
r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
|
||||
webpage, 'error', group='error')
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(video_id),
|
||||
'thumbnail': 'https://cdn-s.highwebmedia.com/uHK3McUtGCG3SMFcd4ZJsRv8/roomimage/%s.jpg' % video_id,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
@@ -172,6 +172,7 @@ class InfoExtractor(object):
|
||||
view_count: How many users have watched the video on the platform.
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
repost_count: Number of reposts of the video
|
||||
average_rating: Average rating give by users, the scale used depends on the webpage
|
||||
comment_count: Number of comments on the video
|
||||
comments: A list of comments, each with one or more of the following
|
||||
@@ -645,8 +646,9 @@ class InfoExtractor(object):
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
|
||||
property_re = r'(?:name|property)=[\'"]?og:%s[\'"]?' % re.escape(prop)
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
|
||||
% {'prop': re.escape(prop)})
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
return [
|
||||
template % (property_re, content_re),
|
||||
|
@@ -27,9 +27,7 @@ class CriterionIE(InfoExtractor):
|
||||
final_url = self._search_regex(
|
||||
r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'<meta name="description" content="(.+?)" />',
|
||||
webpage, 'video description')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._search_regex(
|
||||
r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||
webpage, 'thumbnail url')
|
||||
|
@@ -32,6 +32,26 @@ from ..aes import (
|
||||
|
||||
|
||||
class CrunchyrollBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://www.crunchyroll.com/?a=formhandler'
|
||||
data = urlencode_postdata({
|
||||
'formname': 'RpcApiUser_Login',
|
||||
'name': username,
|
||||
'password': password,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else compat_urllib_request.Request(url_or_request))
|
||||
@@ -46,10 +66,22 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(
|
||||
request, video_id, note, errnote, fatal, tries, timeout, encoding)
|
||||
|
||||
@staticmethod
|
||||
def _add_skip_wall(url):
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
qs = compat_urlparse.parse_qs(parsed_url.query)
|
||||
# Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
|
||||
# > This content may be inappropriate for some people.
|
||||
# > Are you sure you want to continue?
|
||||
# since it's not disabled by default in crunchyroll account's settings.
|
||||
# See https://github.com/rg3/youtube-dl/issues/7202.
|
||||
qs['skip_wall'] = ['1']
|
||||
return compat_urlparse.urlunparse(
|
||||
parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
|
||||
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'info_dict': {
|
||||
@@ -81,10 +113,13 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo-restricted (US), 18+ maturity wall, non-premium available
|
||||
'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
@@ -94,24 +129,6 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'1080': ('80', '108'),
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://www.crunchyroll.com/?a=formhandler'
|
||||
data = urlencode_postdata({
|
||||
'formname': 'RpcApiUser_Login',
|
||||
'name': username,
|
||||
'password': password,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
|
||||
iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
|
||||
@@ -254,7 +271,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
else:
|
||||
webpage_url = 'http://www.' + mobj.group('url')
|
||||
|
||||
webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
|
||||
webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
|
||||
note_m = self._html_search_regex(
|
||||
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
||||
webpage, 'trailer-notice', default='')
|
||||
@@ -352,7 +369,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = "crunchyroll:playlist"
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
@@ -361,12 +378,25 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# geo-restricted (US), 18+ maturity wall, non-premium available
|
||||
'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
|
||||
'info_dict': {
|
||||
'id': 'cosplay-complex-ova',
|
||||
'title': 'Cosplay Complex OVA'
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'skip': 'Georestricted',
|
||||
}, {
|
||||
# geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
|
||||
'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
webpage = self._download_webpage(self._add_skip_wall(url), show_id)
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
|
||||
webpage, 'title')
|
||||
|
@@ -96,6 +96,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'uploader': 'HotWaves1012',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
# geo-restricted, player v5
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/xhza0o',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -124,6 +129,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
metadata = player['metadata']
|
||||
|
||||
self._check_error(metadata)
|
||||
|
||||
formats = []
|
||||
for quality, media_list in metadata['qualities'].items():
|
||||
for media in media_list:
|
||||
@@ -201,9 +209,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'video info', flags=re.MULTILINE),
|
||||
video_id)
|
||||
|
||||
if info.get('error') is not None:
|
||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||
raise ExtractorError(msg, expected=True)
|
||||
self._check_error(info)
|
||||
|
||||
formats = []
|
||||
for (key, format_id) in self._FORMATS:
|
||||
@@ -246,6 +252,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'duration': info['duration']
|
||||
}
|
||||
|
||||
def _check_error(self, info):
|
||||
if info.get('error') is not None:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True)
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
sub_list = self._download_webpage(
|
||||
|
@@ -87,7 +87,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id,
|
||||
'mp4', entry_protocol='m3u8_native')
|
||||
'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
mp4_url = self._get_video_url(
|
||||
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
|
||||
|
@@ -33,20 +33,27 @@ class ExpoTVIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_key = self._search_regex(
|
||||
r'<param name="playerKey" value="([^"]+)"', webpage, 'player key')
|
||||
config_url = 'http://client.expotv.com/video/config/%s/%s' % (
|
||||
video_id, player_key)
|
||||
config = self._download_json(
|
||||
config_url, video_id,
|
||||
note='Downloading video configuration')
|
||||
'http://client.expotv.com/video/config/%s/%s' % (video_id, player_key),
|
||||
video_id, 'Downloading video configuration')
|
||||
|
||||
formats = [{
|
||||
'url': fcfg['file'],
|
||||
'height': int_or_none(fcfg.get('height')),
|
||||
'format_note': fcfg.get('label'),
|
||||
'ext': self._search_regex(
|
||||
r'filename=.*\.([a-z0-9_A-Z]+)&', fcfg['file'],
|
||||
'file extension', default=None),
|
||||
} for fcfg in config['sources']]
|
||||
formats = []
|
||||
for fcfg in config['sources']:
|
||||
media_url = fcfg.get('file')
|
||||
if not media_url:
|
||||
continue
|
||||
if fcfg.get('type') == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'height': int_or_none(fcfg.get('height')),
|
||||
'format_id': fcfg.get('label'),
|
||||
'ext': self._search_regex(
|
||||
r'filename=.*\.([a-z0-9_A-Z]+)&', media_url,
|
||||
'file extension', default=None) or fcfg.get('type'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
41
youtube_dl/extractor/fczenit.py
Normal file
41
youtube_dl/extractor/fczenit.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FczenitIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://fc-zenit.ru/video/gl6785/',
|
||||
'md5': '458bacc24549173fe5a5aa29174a5606',
|
||||
'info_dict': {
|
||||
'id': '6785',
|
||||
'ext': 'mp4',
|
||||
'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title')
|
||||
|
||||
bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL')
|
||||
bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw)
|
||||
|
||||
formats = [{
|
||||
'url': furl,
|
||||
'tbr': tbr,
|
||||
} for furl, tbr in bitrates]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
}
|
@@ -2,11 +2,15 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
replace_extension,
|
||||
)
|
||||
|
||||
|
||||
@@ -28,6 +32,7 @@ class FiveMinIE(InfoExtractor):
|
||||
'id': '518013791',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPad Mini with Retina Display Review',
|
||||
'duration': 177,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -38,9 +43,52 @@ class FiveMinIE(InfoExtractor):
|
||||
'id': '518086247',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Make a Next-Level Fruit Salad',
|
||||
'duration': 184,
|
||||
},
|
||||
},
|
||||
]
|
||||
_ERRORS = {
|
||||
'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.',
|
||||
'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.',
|
||||
'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.',
|
||||
'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.',
|
||||
'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
|
||||
'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
|
||||
}
|
||||
_QUALITIES = {
|
||||
1: {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
2: {
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
},
|
||||
4: {
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
8: {
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
},
|
||||
16: {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
32: {
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
},
|
||||
64: {
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
128: {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -59,26 +107,36 @@ class FiveMinIE(InfoExtractor):
|
||||
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
|
||||
video_id)
|
||||
if not response['success']:
|
||||
err_msg = response['errorMessage']
|
||||
if err_msg == 'ErrorVideoUserNotGeo':
|
||||
msg = 'Video not available from your location'
|
||||
else:
|
||||
msg = 'Aol said: %s' % err_msg
|
||||
raise ExtractorError(msg, expected=True, video_id=video_id)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (
|
||||
self.IE_NAME,
|
||||
self._ERRORS.get(response['errorMessage'], response['errorMessage'])),
|
||||
expected=True)
|
||||
info = response['binding'][0]
|
||||
|
||||
second_id = compat_str(int(video_id[:-2]) + 1)
|
||||
formats = []
|
||||
for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]:
|
||||
if any(r['ID'] == quality for r in info['Renditions']):
|
||||
parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
|
||||
for rendition in info['Renditions']:
|
||||
if rendition['RenditionType'] == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
|
||||
elif rendition['RenditionType'] == 'aac':
|
||||
continue
|
||||
else:
|
||||
rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
|
||||
quality = self._QUALITIES.get(rendition['ID'], {})
|
||||
formats.append({
|
||||
'format_id': compat_str(quality),
|
||||
'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality),
|
||||
'height': height,
|
||||
'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']),
|
||||
'url': rendition_url,
|
||||
'width': quality.get('width'),
|
||||
'height': quality.get('height'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['Title'],
|
||||
'thumbnail': info.get('ThumbURL'),
|
||||
'duration': parse_duration(info.get('Duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -4,8 +4,8 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
from ..utils import (
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,24 +30,33 @@ class ImdbIE(InfoExtractor):
|
||||
descr = self._html_search_regex(
|
||||
r'(?s)<span itemprop="description">(.*?)</span>',
|
||||
webpage, 'description', fatal=False)
|
||||
available_formats = re.findall(
|
||||
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
|
||||
flags=re.MULTILINE)
|
||||
player_url = 'http://www.imdb.com/video/imdb/vi%s/imdb/single' % video_id
|
||||
player_page = self._download_webpage(
|
||||
player_url, video_id, 'Downloading player page')
|
||||
# the player page contains the info for the default format, we have to
|
||||
# fetch other pages for the rest of the formats
|
||||
extra_formats = re.findall(r'href="(?P<url>%s.*?)".*?>(?P<name>.*?)<' % re.escape(player_url), player_page)
|
||||
format_pages = [
|
||||
self._download_webpage(
|
||||
f_url, video_id, 'Downloading info for %s format' % f_name)
|
||||
for f_url, f_name in extra_formats]
|
||||
format_pages.append(player_page)
|
||||
|
||||
quality = qualities(['SD', '480p', '720p'])
|
||||
formats = []
|
||||
for f_id, f_path in available_formats:
|
||||
f_path = f_path.strip()
|
||||
format_page = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, f_path),
|
||||
'Downloading info for %s format' % f_id)
|
||||
for format_page in format_pages:
|
||||
json_data = self._search_regex(
|
||||
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
|
||||
format_page, 'json data', flags=re.DOTALL)
|
||||
info = json.loads(json_data)
|
||||
format_info = info['videoPlayerObject']['video']
|
||||
f_id = format_info['ffname']
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'url': format_info['videoInfoList'][0]['videoUrl'],
|
||||
'quality': quality(f_id),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -191,7 +191,7 @@ class IqiyiIE(InfoExtractor):
|
||||
'vid': video_id,
|
||||
'vinfo': 1,
|
||||
'tm': tm,
|
||||
'enc': self.md5_text((enc_key + tail)[1:64:2] + tail),
|
||||
'enc': self.md5_text(enc_key + tail),
|
||||
'qyid': _uuid,
|
||||
'tn': random.random(),
|
||||
'um': 0,
|
||||
@@ -205,7 +205,9 @@ class IqiyiIE(InfoExtractor):
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
# TODO: automatic key extraction
|
||||
enc_key = 'eac64f22daf001da6ba9aa8da4d501508bbe90a4d4091fea3b0582a85b38c2cc' # last update at 2015-09-23-23 for Zombie::bite
|
||||
# last update at 2015-10-10 for Zombie::bite
|
||||
# '7239670519b6ac209a0bee4ef0446a6b24894b8ac2751506e42116212a0d0272e505'[2:66][1::2]
|
||||
enc_key = '97596c0abee04ab49ba25564161ad225'
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -28,7 +28,7 @@ class JeuxVideoIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
title = self._html_search_meta('name', webpage)
|
||||
title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
|
||||
config_url = self._html_search_regex(
|
||||
r'data-src="(/contenu/medias/video.php.*?)"',
|
||||
webpage, 'config URL')
|
||||
|
@@ -9,13 +9,14 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
encode_data_uri,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,15 +26,16 @@ class LetvIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.letv.com/ptv/vplay/22005890.html',
|
||||
'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
|
||||
'md5': 'edadcfe5406976f42f9f266057ee5e40',
|
||||
'info_dict': {
|
||||
'id': '22005890',
|
||||
'ext': 'mp4',
|
||||
'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
|
||||
'timestamp': 1424747397,
|
||||
'upload_date': '20150224',
|
||||
'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.letv.com/ptv/vplay/1415246.html',
|
||||
'info_dict': {
|
||||
@@ -42,16 +44,22 @@ class LetvIE(InfoExtractor):
|
||||
'title': '美人天下01',
|
||||
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
|
||||
},
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'This video is available only in Mainland China, thus a proxy is needed',
|
||||
'url': 'http://www.letv.com/ptv/vplay/1118082.html',
|
||||
'md5': 'f80936fbe20fb2f58648e81386ff7927',
|
||||
'md5': '2424c74948a62e5f31988438979c5ad1',
|
||||
'info_dict': {
|
||||
'id': '1118082',
|
||||
'ext': 'mp4',
|
||||
'title': '与龙共舞 完整版',
|
||||
'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
|
||||
},
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
'skip': 'Only available in China',
|
||||
}]
|
||||
|
||||
@@ -74,6 +82,27 @@ class LetvIE(InfoExtractor):
|
||||
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
|
||||
return _loc3_
|
||||
|
||||
# see M3U8Encryption class in KLetvPlayer.swf
|
||||
@staticmethod
|
||||
def decrypt_m3u8(encrypted_data):
|
||||
if encrypted_data[:5].decode('utf-8').lower() != 'vc_01':
|
||||
return encrypted_data
|
||||
encrypted_data = encrypted_data[5:]
|
||||
|
||||
_loc4_ = bytearray()
|
||||
while encrypted_data:
|
||||
b = compat_ord(encrypted_data[0])
|
||||
_loc4_.extend([b // 16, b & 0x0f])
|
||||
encrypted_data = encrypted_data[1:]
|
||||
idx = len(_loc4_) - 11
|
||||
_loc4_ = _loc4_[idx:] + _loc4_[:idx]
|
||||
_loc7_ = bytearray()
|
||||
while _loc4_:
|
||||
_loc7_.append(_loc4_[0] * 16 + _loc4_[1])
|
||||
_loc4_ = _loc4_[2:]
|
||||
|
||||
return bytes(_loc7_)
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
page = self._download_webpage(url, media_id)
|
||||
@@ -115,23 +144,28 @@ class LetvIE(InfoExtractor):
|
||||
for format_id in formats:
|
||||
if format_id in dispatch:
|
||||
media_url = playurl['domain'][0] + dispatch[format_id][0]
|
||||
|
||||
# Mimic what flvxz.com do
|
||||
url_parts = list(compat_urlparse.urlparse(media_url))
|
||||
qs = dict(compat_urlparse.parse_qs(url_parts[4]))
|
||||
qs.update({
|
||||
'platid': '14',
|
||||
'splatid': '1401',
|
||||
'tss': 'no',
|
||||
'retry': 1
|
||||
media_url += '&' + compat_urllib_parse.urlencode({
|
||||
'm3v': 1,
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'rateid': format_id,
|
||||
})
|
||||
url_parts[4] = compat_urllib_parse.urlencode(qs)
|
||||
media_url = compat_urlparse.urlunparse(url_parts)
|
||||
|
||||
nodes_data = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id)
|
||||
|
||||
req = self._request_webpage(
|
||||
nodes_data['nodelist'][0]['location'], media_id,
|
||||
note='Downloading m3u8 information for format %s' % format_id)
|
||||
|
||||
m3u8_data = self.decrypt_m3u8(req.read())
|
||||
|
||||
url_info_dict = {
|
||||
'url': media_url,
|
||||
'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
|
||||
'ext': determine_ext(dispatch[format_id][1]),
|
||||
'format_id': format_id,
|
||||
'protocol': 'm3u8',
|
||||
}
|
||||
|
||||
if format_id[-1:] == 'p':
|
||||
|
@@ -140,13 +140,14 @@ class LyndaIE(LyndaBaseIE):
|
||||
|
||||
prioritized_streams = video_json.get('PrioritizedStreams')
|
||||
if prioritized_streams:
|
||||
formats.extend([
|
||||
{
|
||||
'url': video_url,
|
||||
'width': int_or_none(format_id),
|
||||
'format_id': format_id,
|
||||
} for format_id, video_url in prioritized_streams['0'].items()
|
||||
])
|
||||
for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
|
||||
formats.extend([
|
||||
{
|
||||
'url': video_url,
|
||||
'width': int_or_none(format_id),
|
||||
'format_id': '%s-%s' % (prioritized_stream_id, format_id),
|
||||
} for format_id, video_url in prioritized_stream.items()
|
||||
])
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
@@ -1,56 +0,0 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class MegaVideozIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>[^/]+)(?:/(?P<display_id>[^/]+))?'
|
||||
_TEST = {
|
||||
'url': 'http://megavideoz.eu/video/WM6UB919XMXH/SMPTE-Universal-Film-Leader',
|
||||
'info_dict': {
|
||||
'id': '48723',
|
||||
'display_id': 'SMPTE-Universal-Film-Leader',
|
||||
'ext': 'mp4',
|
||||
'title': 'SMPTE Universal Film Leader',
|
||||
'thumbnail': 're:https?://.*?\.jpg',
|
||||
'duration': 10.93,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if any(p in webpage for p in ('>Video Not Found<', '>404 Error<')):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
config = self._download_xml(
|
||||
self._search_regex(
|
||||
r"var\s+cnf\s*=\s*'([^']+)'", webpage, 'cnf url'),
|
||||
display_id)
|
||||
|
||||
video_url = xpath_text(config, './file', 'video url', fatal=True)
|
||||
title = xpath_text(config, './title', 'title', fatal=True)
|
||||
thumbnail = xpath_text(config, './image', 'thumbnail')
|
||||
duration = float_or_none(xpath_text(config, './duration', 'duration'))
|
||||
video_id = xpath_text(config, './mediaid', 'video id') or video_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration
|
||||
}
|
@@ -36,7 +36,7 @@ class RTBFIE(InfoExtractor):
|
||||
|
||||
data = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-video="([^"]+)"', webpage, 'data video')),
|
||||
r'data-media="([^"]+)"', webpage, 'data video')),
|
||||
video_id)
|
||||
|
||||
if data.get('provider').lower() == 'youtube':
|
||||
|
@@ -9,16 +9,16 @@ from ..utils import (
|
||||
|
||||
|
||||
class RteIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.rte.ie/player/de/show/10363114/',
|
||||
'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
|
||||
'info_dict': {
|
||||
'id': '10363114',
|
||||
'id': '10478715',
|
||||
'ext': 'mp4',
|
||||
'title': 'One News',
|
||||
'title': 'Watch iWitness online',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'The One O\'Clock News followed by Weather.',
|
||||
'duration': 436.844,
|
||||
'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.',
|
||||
'duration': 60.046,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'f4m fails with --test atm'
|
||||
|
@@ -15,6 +15,7 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
encode_dict,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@@ -27,8 +28,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
|
||||
_API_BASE = 'https://api.twitch.tv'
|
||||
_USHER_BASE = 'http://usher.twitch.tv'
|
||||
_LOGIN_URL = 'https://secure.twitch.tv/login'
|
||||
_LOGIN_POST_URL = 'https://passport.twitch.tv/authentications/new'
|
||||
_LOGIN_URL = 'http://www.twitch.tv/login'
|
||||
_NETRC_MACHINE = 'twitch'
|
||||
|
||||
def _handle_error(self, response):
|
||||
@@ -61,26 +61,28 @@ class TwitchBaseIE(InfoExtractor):
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
login_page, handle = self._download_webpage_handle(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'login': username.encode('utf-8'),
|
||||
'password': password.encode('utf-8'),
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
|
||||
redirect_url = handle.geturl()
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post url', default=self._LOGIN_POST_URL, group='url')
|
||||
'post url', default=redirect_url, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
post_url = compat_urlparse.urljoin(redirect_url, post_url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8'))
|
||||
request.add_header('Referer', redirect_url)
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
@@ -238,14 +240,24 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
|
||||
info = self._download_info(self._ITEM_SHORTCUT, item_id)
|
||||
access_token = self._download_json(
|
||||
'%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
|
||||
'Downloading %s access token' % self._ITEM_TYPE)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/vod/%s?nauth=%s&nauthsig=%s&allow_source=true'
|
||||
% (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
|
||||
'%s/vod/%s?%s' % (
|
||||
self._USHER_BASE, item_id,
|
||||
compat_urllib_parse.urlencode({
|
||||
'allow_source': 'true',
|
||||
'allow_spectre': 'true',
|
||||
'player': 'twitchweb',
|
||||
'nauth': access_token['token'],
|
||||
'nauthsig': access_token['sig'],
|
||||
})),
|
||||
item_id, 'mp4')
|
||||
|
||||
self._prefer_source(formats)
|
||||
info['formats'] = formats
|
||||
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -6,23 +7,51 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
unescapeHTML,
|
||||
xpath_text,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class TwitterCardIE(InfoExtractor):
|
||||
IE_NAME = 'twitter:card'
|
||||
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
|
||||
'md5': 'a74f50b310c83170319ba16de6955192',
|
||||
'info_dict': {
|
||||
'id': '560070183650213889',
|
||||
'ext': 'mp4',
|
||||
'title': 'TwitterCard',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 30.033,
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
|
||||
'md5': '7d2f6b4d2eb841a7ccc893d479bfceb4',
|
||||
'info_dict': {
|
||||
'id': '560070183650213889',
|
||||
'ext': 'mp4',
|
||||
'title': 'TwitterCard',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 30.033,
|
||||
}
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
|
||||
'md5': '7ee2a553b63d1bccba97fbed97d9e1c8',
|
||||
'info_dict': {
|
||||
'id': '623160978427936768',
|
||||
'ext': 'mp4',
|
||||
'title': 'TwitterCard',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 80.155,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
|
||||
'md5': 'b6f35e8b08a0bec6c8af77a2f4b3a814',
|
||||
'info_dict': {
|
||||
'id': 'dq4Oj5quskI',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ubuntu 11.10 Overview',
|
||||
'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/',
|
||||
'upload_date': '20111013',
|
||||
'uploader': 'OMG! Ubuntu!',
|
||||
'uploader_id': 'omgubuntu',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -40,10 +69,24 @@ class TwitterCardIE(InfoExtractor):
|
||||
request.add_header('User-Agent', user_agent)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-player-config="([^"]+)"', webpage, 'data player config')),
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
|
||||
webpage, 'youtube iframe', default=None)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, 'Youtube')
|
||||
|
||||
config = self._parse_json(self._html_search_regex(
|
||||
r'data-player-config="([^"]+)"', webpage, 'data player config'),
|
||||
video_id)
|
||||
if 'playlist' not in config:
|
||||
if 'vmapUrl' in config:
|
||||
vmap_data = self._download_xml(config['vmapUrl'], video_id)
|
||||
video_url = xpath_text(vmap_data, './/MediaFile').strip()
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
})
|
||||
break # same video regardless of UA
|
||||
continue
|
||||
|
||||
video_url = config['playlist'][0]['source']
|
||||
|
||||
@@ -70,3 +113,54 @@ class TwitterCardIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class TwitterIE(InfoExtractor):
|
||||
IE_NAME = 'twitter'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
|
||||
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
||||
'md5': '31cd83a116fc41f99ae3d909d4caf6a0',
|
||||
'info_dict': {
|
||||
'id': '643211948184596480',
|
||||
'ext': 'mp4',
|
||||
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 12.922,
|
||||
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
|
||||
'uploader': 'FREE THE NIPPLE',
|
||||
'uploader_id': 'freethenipple',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('user_id')
|
||||
twid = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(self._TEMPLATE_URL % (user_id, twid), twid)
|
||||
|
||||
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
||||
|
||||
title = self._og_search_description(webpage).strip('').replace('\n', ' ')
|
||||
|
||||
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
|
||||
mobj = re.match(r'“(.*)\s+(https?://[^ ]+)”', title)
|
||||
title, short_url = mobj.groups()
|
||||
|
||||
card_id = self._search_regex(
|
||||
r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url')
|
||||
card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'TwitterCard',
|
||||
'uploader_id': user_id,
|
||||
'uploader': username,
|
||||
'url': card_url,
|
||||
'webpage_url': url,
|
||||
'description': '%s on Twitter: "%s %s"' % (username, title, short_url),
|
||||
'title': username + ' - ' + title,
|
||||
}
|
||||
|
@@ -93,6 +93,10 @@ class VidmeIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# nsfw, user-disabled
|
||||
'url': 'https://vid.me/dzGJ',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -114,6 +118,12 @@ class VidmeIE(InfoExtractor):
|
||||
|
||||
video = response['video']
|
||||
|
||||
if video.get('state') == 'user-disabled':
|
||||
raise ExtractorError(
|
||||
'Vidme said: This video has been suspended either due to a copyright claim, '
|
||||
'or for violating the terms of use.',
|
||||
expected=True)
|
||||
|
||||
formats = [{
|
||||
'format_id': f.get('type'),
|
||||
'url': f['uri'],
|
||||
|
@@ -131,10 +131,11 @@ class ViewsterIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls',
|
||||
fatal=False # m3u8 sometimes fail
|
||||
))
|
||||
fatal=False) # m3u8 sometimes fail
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
format_id = media.get('Bitrate')
|
||||
f = {
|
||||
|
@@ -39,7 +39,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
return
|
||||
self.report_login()
|
||||
webpage = self._download_webpage(self._LOGIN_URL, None, False)
|
||||
token = self._extract_xsrft(webpage)
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
data = urlencode_postdata({
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
@@ -49,13 +49,18 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
})
|
||||
login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Cookie', 'vuid=%s' % vuid)
|
||||
login_request.add_header('Referer', self._LOGIN_URL)
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _extract_xsrft(self, webpage):
|
||||
return self._search_regex(
|
||||
def _extract_xsrft_and_vuid(self, webpage):
|
||||
xsrft = self._search_regex(
|
||||
r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||
webpage, 'login token', group='xsrft')
|
||||
vuid = self._search_regex(
|
||||
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
|
||||
webpage, 'vuid', group='vuid')
|
||||
return xsrft, vuid
|
||||
|
||||
|
||||
class VimeoIE(VimeoBaseInfoExtractor):
|
||||
@@ -80,12 +85,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '56015672',
|
||||
'ext': 'mp4',
|
||||
"upload_date": "20121220",
|
||||
"description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
"uploader_id": "user7108434",
|
||||
"uploader": "Filippo Valsorda",
|
||||
"title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
"duration": 10,
|
||||
'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
'description': 'md5:2d3305bad981a06ff79f027f19865021',
|
||||
'upload_date': '20121220',
|
||||
'uploader_id': 'user7108434',
|
||||
'uploader': 'Filippo Valsorda',
|
||||
'duration': 10,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -98,7 +103,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader_id': 'openstreetmapus',
|
||||
'uploader': 'OpenStreetMap US',
|
||||
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
|
||||
'description': 'md5:380943ec71b89736ff4bf27183233d09',
|
||||
'description': 'md5:fd69a7b8d8c34a4e1d2ec2e4afd6ec30',
|
||||
'duration': 1595,
|
||||
},
|
||||
},
|
||||
@@ -152,7 +157,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/76979871',
|
||||
'md5': '3363dd6ffebe3784d56f4132317fd446',
|
||||
'note': 'Video with subtitles',
|
||||
'info_dict': {
|
||||
'id': '76979871',
|
||||
@@ -198,7 +202,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token = self._extract_xsrft(webpage)
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
data = urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
@@ -208,6 +212,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = compat_urllib_request.Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Cookie', 'clip_test2=1; vuid=%s' % vuid)
|
||||
password_request.add_header('Referer', url)
|
||||
return self._download_webpage(
|
||||
password_request, video_id,
|
||||
@@ -281,7 +286,17 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
try:
|
||||
try:
|
||||
config_url = self._html_search_regex(
|
||||
r' data-config-url="(.+?)"', webpage, 'config URL')
|
||||
r' data-config-url="(.+?)"', webpage,
|
||||
'config URL', default=None)
|
||||
if not config_url:
|
||||
# Sometimes new react-based page is served instead of old one that require
|
||||
# different config URL extraction approach (see
|
||||
# https://github.com/rg3/youtube-dl/pull/7209)
|
||||
vimeo_clip_page_config = self._search_regex(
|
||||
r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
|
||||
'vimeo clip page config')
|
||||
config_url = self._parse_json(
|
||||
vimeo_clip_page_config, video_id)['player']['config_url']
|
||||
config_json = self._download_webpage(config_url, video_id)
|
||||
config = json.loads(config_json)
|
||||
except RegexNotFoundError:
|
||||
@@ -391,14 +406,20 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'ext': codec_extension,
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': file_info.get('width'),
|
||||
'height': file_info.get('height'),
|
||||
'width': int_or_none(file_info.get('width')),
|
||||
'height': int_or_none(file_info.get('height')),
|
||||
'tbr': int_or_none(file_info.get('bitrate')),
|
||||
})
|
||||
formats = []
|
||||
m3u8_url = config_files.get('hls', {}).get('all')
|
||||
if m3u8_url:
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
for key in ('other', 'sd', 'hd'):
|
||||
formats += files[key]
|
||||
if len(formats) == 0:
|
||||
raise ExtractorError('No known codec found')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
text_tracks = config['request'].get('text_tracks')
|
||||
@@ -459,7 +480,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
if password is None:
|
||||
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
||||
fields = self._hidden_inputs(login_form)
|
||||
token = self._extract_xsrft(webpage)
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
fields['token'] = token
|
||||
fields['password'] = password
|
||||
post = urlencode_postdata(fields)
|
||||
@@ -468,6 +489,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
password_url = compat_urlparse.urljoin(page_url, password_path)
|
||||
password_request = compat_urllib_request.Request(password_url, post)
|
||||
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Cookie', 'vuid=%s' % vuid)
|
||||
self._set_cookie('vimeo.com', 'xsrft', token)
|
||||
|
||||
return self._download_webpage(
|
||||
|
@@ -1,10 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class VineIE(InfoExtractor):
|
||||
@@ -17,10 +21,12 @@ class VineIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Chicken.',
|
||||
'alt_title': 'Vine by Jack Dorsey',
|
||||
'description': 'Chicken.',
|
||||
'upload_date': '20130519',
|
||||
'uploader': 'Jack Dorsey',
|
||||
'uploader_id': '76',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/v/MYxVapFvz2z',
|
||||
@@ -29,11 +35,13 @@ class VineIE(InfoExtractor):
|
||||
'id': 'MYxVapFvz2z',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
|
||||
'alt_title': 'Vine by Luna',
|
||||
'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
|
||||
'alt_title': 'Vine by Mars Ruiz',
|
||||
'upload_date': '20140815',
|
||||
'uploader': 'Luna',
|
||||
'uploader': 'Mars Ruiz',
|
||||
'uploader_id': '1102363502380728320',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/v/bxVjBbZlPUH',
|
||||
@@ -43,14 +51,33 @@ class VineIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '#mw3 #ac130 #killcam #angelofdeath',
|
||||
'alt_title': 'Vine by Z3k3',
|
||||
'description': '#mw3 #ac130 #killcam #angelofdeath',
|
||||
'upload_date': '20130430',
|
||||
'uploader': 'Z3k3',
|
||||
'uploader_id': '936470460173008896',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vine.co/v/e192BnZnZ9V',
|
||||
'info_dict': {
|
||||
'id': 'e192BnZnZ9V',
|
||||
'ext': 'mp4',
|
||||
'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries #lovesickseason2',
|
||||
'alt_title': 'Vine by Pimry_zaa',
|
||||
'upload_date': '20150705',
|
||||
'uploader': 'Pimry_zaa',
|
||||
'uploader_id': '1135760698325307392',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -65,25 +92,26 @@ class VineIE(InfoExtractor):
|
||||
|
||||
formats = [{
|
||||
'format_id': '%(format)s-%(rate)s' % f,
|
||||
'vcodec': f['format'],
|
||||
'quality': f['rate'],
|
||||
'vcodec': f.get('format'),
|
||||
'quality': f.get('rate'),
|
||||
'url': f['videoUrl'],
|
||||
} for f in data['videoUrls']]
|
||||
} for f in data['videoUrls'] if f.get('videoUrl')]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
username = data.get('username')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'alt_title': self._og_search_description(webpage, default=None),
|
||||
'description': data['description'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'upload_date': unified_strdate(data['created']),
|
||||
'uploader': data['username'],
|
||||
'uploader_id': data['userIdStr'],
|
||||
'like_count': data['likes']['count'],
|
||||
'comment_count': data['comments']['count'],
|
||||
'repost_count': data['reposts']['count'],
|
||||
'title': data.get('description') or self._og_search_title(webpage),
|
||||
'alt_title': 'Vine by %s' % username if username else self._og_search_description(webpage, default=None),
|
||||
'thumbnail': data.get('thumbnailUrl'),
|
||||
'upload_date': unified_strdate(data.get('created')),
|
||||
'uploader': username,
|
||||
'uploader_id': data.get('userIdStr'),
|
||||
'like_count': int_or_none(data.get('likes', {}).get('count')),
|
||||
'comment_count': int_or_none(data.get('comments', {}).get('count')),
|
||||
'repost_count': int_or_none(data.get('reposts', {}).get('count')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
@@ -46,6 +46,12 @@ class YandexMusicTrackIE(InfoExtractor):
|
||||
% (data['host'], key, data['ts'] + data['path'], storage[1]))
|
||||
|
||||
def _get_track_info(self, track):
|
||||
thumbnail = None
|
||||
cover_uri = track.get('albums', [{}])[0].get('coverUri')
|
||||
if cover_uri:
|
||||
thumbnail = cover_uri.replace('%%', 'orig')
|
||||
if not thumbnail.startswith('http'):
|
||||
thumbnail = 'http://' + thumbnail
|
||||
return {
|
||||
'id': track['id'],
|
||||
'ext': 'mp3',
|
||||
@@ -53,6 +59,7 @@ class YandexMusicTrackIE(InfoExtractor):
|
||||
'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
|
||||
'filesize': int_or_none(track.get('fileSize')),
|
||||
'duration': float_or_none(track.get('durationMs'), 1000),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -178,6 +178,52 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
return
|
||||
|
||||
|
||||
class YoutubePlaylistBaseInfoExtractor(InfoExtractor):
|
||||
# Extract the video ids from the playlist pages
|
||||
def _entries(self, page, playlist_id):
|
||||
more_widget_html = content_html = page
|
||||
for page_num in itertools.count(1):
|
||||
for video_id, video_title in self.extract_videos_from_page(content_html):
|
||||
yield self.url_result(
|
||||
video_id, 'Youtube', video_id=video_id,
|
||||
video_title=video_title)
|
||||
|
||||
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||
if not mobj:
|
||||
break
|
||||
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||
'Downloading page #%s' % page_num,
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
if not content_html.strip():
|
||||
# Some webpages show a "Load more" button but they don't
|
||||
# have more videos
|
||||
break
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
def extract_videos_from_page(self, page):
|
||||
ids_in_page = []
|
||||
titles_in_page = []
|
||||
for mobj in re.finditer(self._VIDEO_RE, page):
|
||||
# The link with index 0 is not the first video of the playlist (not sure if still actual)
|
||||
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
|
||||
continue
|
||||
video_id = mobj.group('id')
|
||||
video_title = unescapeHTML(mobj.group('title'))
|
||||
if video_title:
|
||||
video_title = video_title.strip()
|
||||
try:
|
||||
idx = ids_in_page.index(video_id)
|
||||
if video_title and not titles_in_page[idx]:
|
||||
titles_in_page[idx] = video_title
|
||||
except ValueError:
|
||||
ids_in_page.append(video_id)
|
||||
titles_in_page.append(video_title)
|
||||
return zip(ids_in_page, titles_in_page)
|
||||
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com'
|
||||
_VALID_URL = r"""(?x)^
|
||||
@@ -1419,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com playlists'
|
||||
_VALID_URL = r"""(?x)(?:
|
||||
(?:https?://)?
|
||||
@@ -1440,7 +1486,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
|
||||
)"""
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
|
||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
|
||||
IE_NAME = 'youtube:playlist'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
||||
@@ -1557,37 +1603,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
self.report_warning('Youtube gives an alert message: ' + match)
|
||||
|
||||
# Extract the video ids from the playlist pages
|
||||
def _entries():
|
||||
more_widget_html = content_html = page
|
||||
for page_num in itertools.count(1):
|
||||
matches = re.finditer(self._VIDEO_RE, content_html)
|
||||
# We remove the duplicates and the link with index 0
|
||||
# (it's not the first video of the playlist)
|
||||
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
||||
for vid_id in new_ids:
|
||||
yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
||||
|
||||
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||
if not mobj:
|
||||
break
|
||||
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||
'Downloading page #%s' % page_num,
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
if not content_html.strip():
|
||||
# Some webpages show a "Load more" button but they don't
|
||||
# have more videos
|
||||
break
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
|
||||
page, 'title')
|
||||
|
||||
return self.playlist_result(_entries(), playlist_id, playlist_title)
|
||||
return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract playlist id
|
||||
@@ -1613,10 +1633,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
return self._extract_playlist(playlist_id)
|
||||
|
||||
|
||||
class YoutubeChannelIE(InfoExtractor):
|
||||
class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com channels'
|
||||
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
|
||||
_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
|
||||
IE_NAME = 'youtube:channel'
|
||||
_TESTS = [{
|
||||
'note': 'paginated channel',
|
||||
@@ -1627,22 +1648,6 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
}
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def extract_videos_from_page(page):
|
||||
ids_in_page = []
|
||||
titles_in_page = []
|
||||
for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
|
||||
video_id = mobj.group('id')
|
||||
video_title = unescapeHTML(mobj.group('title'))
|
||||
try:
|
||||
idx = ids_in_page.index(video_id)
|
||||
if video_title and not titles_in_page[idx]:
|
||||
titles_in_page[idx] = video_title
|
||||
except ValueError:
|
||||
ids_in_page.append(video_id)
|
||||
titles_in_page.append(video_title)
|
||||
return zip(ids_in_page, titles_in_page)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
@@ -1685,29 +1690,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
for video_id, video_title in self.extract_videos_from_page(channel_page)]
|
||||
return self.playlist_result(entries, channel_id)
|
||||
|
||||
def _entries():
|
||||
more_widget_html = content_html = channel_page
|
||||
for pagenum in itertools.count(1):
|
||||
|
||||
for video_id, video_title in self.extract_videos_from_page(content_html):
|
||||
yield self.url_result(
|
||||
video_id, 'Youtube', video_id=video_id,
|
||||
video_title=video_title)
|
||||
|
||||
mobj = re.search(
|
||||
r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
|
||||
more_widget_html)
|
||||
if not mobj:
|
||||
break
|
||||
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), channel_id,
|
||||
'Downloading page #%s' % (pagenum + 1),
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
return self.playlist_result(_entries(), channel_id)
|
||||
return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
|
||||
|
||||
|
||||
class YoutubeUserIE(YoutubeChannelIE):
|
||||
|
@@ -9,9 +9,11 @@ from ..utils import ExtractorError
|
||||
|
||||
class ZingMp3BaseInfoExtractor(InfoExtractor):
|
||||
|
||||
def _extract_item(self, item):
|
||||
def _extract_item(self, item, fatal=True):
|
||||
error_message = item.find('./errormessage').text
|
||||
if error_message:
|
||||
if not fatal:
|
||||
return
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_message),
|
||||
expected=True)
|
||||
@@ -43,7 +45,9 @@ class ZingMp3BaseInfoExtractor(InfoExtractor):
|
||||
entries = []
|
||||
|
||||
for i, item in enumerate(items, 1):
|
||||
entry = self._extract_item(item)
|
||||
entry = self._extract_item(item, fatal=False)
|
||||
if not entry:
|
||||
continue
|
||||
entry['id'] = '%s-%d' % (id, i)
|
||||
entries.append(entry)
|
||||
|
||||
@@ -85,7 +89,7 @@ class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
|
||||
|
||||
|
||||
class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
|
||||
_VALID_URL = r'https?://mp3\.zing\.vn/album/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
|
||||
_VALID_URL = r'https?://mp3\.zing\.vn/(?:album|playlist)/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
|
||||
'info_dict': {
|
||||
@@ -94,6 +98,9 @@ class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
|
||||
'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
IE_NAME = 'zingmp3:album'
|
||||
IE_DESC = 'mp3.zing.vn albums'
|
||||
|
@@ -3,6 +3,7 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import calendar
|
||||
import codecs
|
||||
import contextlib
|
||||
@@ -1371,7 +1372,12 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
|
||||
v = getattr(v, get_attr, None)
|
||||
if v == '':
|
||||
v = None
|
||||
return default if v is None else (int(v) * invscale // scale)
|
||||
if v is None:
|
||||
return default
|
||||
try:
|
||||
return int(v) * invscale // scale
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def str_or_none(v, default=None):
|
||||
@@ -1387,7 +1393,12 @@ def str_to_int(int_str):
|
||||
|
||||
|
||||
def float_or_none(v, scale=1, invscale=1, default=None):
|
||||
return default if v is None else (float(v) * invscale / scale)
|
||||
if v is None:
|
||||
return default
|
||||
try:
|
||||
return float(v) * invscale / scale
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def parse_duration(s):
|
||||
@@ -1785,6 +1796,10 @@ def urlhandle_detect_ext(url_handle):
|
||||
return mimetype2ext(getheader('Content-Type'))
|
||||
|
||||
|
||||
def encode_data_uri(data, mime_type):
|
||||
return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
|
||||
|
||||
|
||||
def age_restricted(content_limit, age_limit):
|
||||
""" Returns True iff the content should be blocked """
|
||||
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.10.09'
|
||||
__version__ = '2015.10.18'
|
||||
|
Reference in New Issue
Block a user