Compare commits
75 Commits
2016.01.01
...
2016.01.09
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b374af6ebd | ||
|
|
16f1131a4d | ||
|
|
d5f071afb5 | ||
|
|
14b4f038c0 | ||
|
|
bcac2a0710 | ||
|
|
1a6d92847f | ||
|
|
6a16fd4a1a | ||
|
|
44731e308c | ||
|
|
4763b624a6 | ||
|
|
6609b3ce37 | ||
|
|
7e182627d9 | ||
|
|
5777f5d386 | ||
|
|
5dbe81a1d3 | ||
|
|
4cf096a4a9 | ||
|
|
18e6c97c48 | ||
|
|
97afd99a18 | ||
|
|
23f13e9754 | ||
|
|
2e02ecbccc | ||
|
|
e4f49a8753 | ||
|
|
51d3045de2 | ||
|
|
76048b23e8 | ||
|
|
f20756fb10 | ||
|
|
17b2d7ca77 | ||
|
|
40f796288a | ||
|
|
2f546d0a3c | ||
|
|
18c782ab26 | ||
|
|
33cee6c7f6 | ||
|
|
a2e51e7b49 | ||
|
|
bd19aa0ed3 | ||
|
|
8f4c56f334 | ||
|
|
1dcc38b233 | ||
|
|
fff79f1867 | ||
|
|
3f17c357d9 | ||
|
|
9938a17f92 | ||
|
|
9746f4314a | ||
|
|
0238451fc0 | ||
|
|
2098aee7d6 | ||
|
|
fb588f6a56 | ||
|
|
896c7a23cd | ||
|
|
1463c5b9ac | ||
|
|
c6270b2ed5 | ||
|
|
ab3176af34 | ||
|
|
5aa535c329 | ||
|
|
133b1886fc | ||
|
|
66295fa4a6 | ||
|
|
e54c44eeab | ||
|
|
a7aaa39863 | ||
|
|
ea6abd740f | ||
|
|
3f3343cd3e | ||
|
|
4059eabd58 | ||
|
|
6b46102661 | ||
|
|
141a273a8b | ||
|
|
2fffb1dcd0 | ||
|
|
e698e4e533 | ||
|
|
b7546397f0 | ||
|
|
0311677258 | ||
|
|
88fb59d91b | ||
|
|
a1d9f6c5dc | ||
|
|
c579c5e967 | ||
|
|
c9c194053d | ||
|
|
f20a11ed25 | ||
|
|
76a353c9e5 | ||
|
|
392f04d586 | ||
|
|
94de6cf59c | ||
|
|
8af2804a5d | ||
|
|
054479754c | ||
|
|
5bafcf6525 | ||
|
|
306c51c669 | ||
|
|
27bfd4e526 | ||
|
|
ca227c8698 | ||
|
|
32f9036447 | ||
|
|
7a0b07c719 | ||
|
|
4d402db521 | ||
|
|
7109903e61 | ||
|
|
957e0db1d2 |
@@ -627,7 +627,7 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly.
|
||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
||||
|
||||
### Can you add support for this anime video site, or site which shows current movies for free?
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Supported sites
|
||||
- **1tv**: Первый канал
|
||||
- **1up.com**
|
||||
- **20min**
|
||||
- **220.ro**
|
||||
- **22tracks:genre**
|
||||
- **22tracks:track**
|
||||
@@ -255,6 +256,7 @@
|
||||
- **Ir90Tv**
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
- **Izlesene**
|
||||
- **JadoreCettePub**
|
||||
- **JeuxVideo**
|
||||
@@ -386,13 +388,14 @@
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
- **NowTV**
|
||||
- **NowTV** (Currently broken)
|
||||
- **NowTVList**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo.nl:live**
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
- **Npr**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
@@ -464,11 +467,13 @@
|
||||
- **RegioTV**
|
||||
- **Restudy**
|
||||
- **ReverbNation**
|
||||
- **Revision3**
|
||||
- **RingTV**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
- **RTBF**
|
||||
- **Rte**
|
||||
- **rte**: Raidió Teilifís Éireann TV
|
||||
- **rte:radio**: Raidió Teilifís Éireann radio
|
||||
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||
- **RTL2**
|
||||
- **RTP**
|
||||
@@ -573,7 +578,6 @@
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
- **TenPlay**
|
||||
- **TestTube**
|
||||
- **TF1**
|
||||
- **TheIntercept**
|
||||
- **TheOnion**
|
||||
|
||||
@@ -1791,6 +1791,10 @@ class YoutubeDL(object):
|
||||
res = ''
|
||||
if fdict.get('ext') in ['f4f', 'f4m']:
|
||||
res += '(unsupported) '
|
||||
if fdict.get('language'):
|
||||
if res:
|
||||
res += ' '
|
||||
res += '[%s]' % fdict['language']
|
||||
if fdict.get('format_note') is not None:
|
||||
res += fdict['format_note'] + ' '
|
||||
if fdict.get('tbr') is not None:
|
||||
|
||||
@@ -46,7 +46,7 @@ class HlsFD(FileDownloader):
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
retval = subprocess.call(args)
|
||||
retval = subprocess.call(args, stdin=subprocess.PIPE)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
|
||||
|
||||
@@ -299,6 +299,7 @@ from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
)
|
||||
from .ivideon import IvideonIE
|
||||
from .izlesene import IzleseneIE
|
||||
from .jadorecettepub import JadoreCettePubIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
@@ -473,6 +474,7 @@ from .npo import (
|
||||
VPROIE,
|
||||
WNLIE
|
||||
)
|
||||
from .npr import NprIE
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
NRKPlaylistIE,
|
||||
@@ -557,12 +559,13 @@ from .redtube import RedTubeIE
|
||||
from .regiotv import RegioTVIE
|
||||
from .restudy import RestudyIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .revision3 import Revision3IE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE
|
||||
from .rte import RteIE, RteRadioIE
|
||||
from .rtlnl import RtlNlIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
@@ -686,7 +689,6 @@ from .telemb import TeleMBIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .tenplay import TenPlayIE
|
||||
from .testurl import TestURLIE
|
||||
from .testtube import TestTubeIE
|
||||
from .tf1 import TF1IE
|
||||
from .theintercept import TheInterceptIE
|
||||
from .theonion import TheOnionIE
|
||||
@@ -746,6 +748,7 @@ from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentytwotracks import (
|
||||
TwentyTwoTracksIE,
|
||||
TwentyTwoTracksGenreIE
|
||||
|
||||
@@ -187,7 +187,8 @@ class AdultSwimIE(InfoExtractor):
|
||||
media_url = file_el.text
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, segment_title, 'mp4', preference=0, m3u8_id='hls'))
|
||||
media_url, segment_title, 'mp4', preference=0,
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (bitrate, ftype),
|
||||
|
||||
@@ -76,5 +76,6 @@ class AMPIE(InfoExtractor):
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': parse_iso8601(item.get('pubDate'), ' '),
|
||||
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import unescapeHTML
|
||||
|
||||
|
||||
class BaiduVideoIE(InfoExtractor):
|
||||
@@ -14,8 +14,8 @@ class BaiduVideoIE(InfoExtractor):
|
||||
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
|
||||
'info_dict': {
|
||||
'id': '1069',
|
||||
'title': '中华小当家 TV版 (全52集)',
|
||||
'description': 'md5:395a419e41215e531c857bb037bbaf80',
|
||||
'title': '中华小当家 TV版国语',
|
||||
'description': 'md5:51be07afe461cf99fa61231421b5397c',
|
||||
},
|
||||
'playlist_count': 52,
|
||||
}, {
|
||||
@@ -25,45 +25,32 @@ class BaiduVideoIE(InfoExtractor):
|
||||
'title': 're:^奔跑吧兄弟',
|
||||
'description': 'md5:1bf88bad6d850930f542d51547c089b8',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
'playlist_mincount': 12,
|
||||
}]
|
||||
|
||||
def _call_api(self, path, category, playlist_id, note):
|
||||
return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (
|
||||
path, category, playlist_id), playlist_id, note)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
category = category2 = mobj.group('type')
|
||||
category, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
if category == 'show':
|
||||
category2 = 'tvshow'
|
||||
category = 'tvshow'
|
||||
if category == 'tv':
|
||||
category = 'tvplay'
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
playlist_detail = self._call_api(
|
||||
'xqinfo', category, playlist_id, 'Download playlist JSON metadata')
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'title\s*:\s*(["\'])(?P<title>[^\']+)\1', webpage,
|
||||
'playlist title', group='title')
|
||||
playlist_description = self._html_search_regex(
|
||||
r'<input[^>]+class="j-data-intro"[^>]+value="([^"]+)"/>', webpage,
|
||||
playlist_id, 'playlist description')
|
||||
playlist_title = playlist_detail['title']
|
||||
playlist_description = unescapeHTML(playlist_detail.get('intro'))
|
||||
|
||||
site = self._html_search_regex(
|
||||
r'filterSite\s*:\s*["\']([^"]*)["\']', webpage,
|
||||
'primary provider site')
|
||||
api_result = self._download_json(
|
||||
'http://v.baidu.com/%s_intro/?dtype=%sPlayUrl&id=%s&site=%s' % (
|
||||
category, category2, playlist_id, site),
|
||||
playlist_id, 'Get playlist links')
|
||||
episodes_detail = self._call_api(
|
||||
'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
|
||||
|
||||
entries = []
|
||||
for episode in api_result[0]['episodes']:
|
||||
episode_id = '%s_%s' % (playlist_id, episode['episode'])
|
||||
|
||||
redirect_page = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, episode['url']), episode_id,
|
||||
note='Download Baidu redirect page')
|
||||
real_url = self._html_search_regex(
|
||||
r'location\.replace\("([^"]+)"\)', redirect_page, 'real URL')
|
||||
|
||||
entries.append(self.url_result(
|
||||
real_url, video_title=episode['single_title']))
|
||||
entries = [self.url_result(
|
||||
episode['url'], video_title=episode['title']
|
||||
) for episode in episodes_detail['videos']]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
@@ -23,7 +23,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'[pb][\da-z]{7}'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>%s)' % _ID_REGEX
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
(?:
|
||||
programmes/(?!articles/)|
|
||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||
music/clips[/#]|
|
||||
radio/player/
|
||||
)
|
||||
(?P<id>%s)
|
||||
''' % _ID_REGEX
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||
@@ -114,14 +124,14 @@ class BBCCoUkIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Episode is no longer available on BBC iPlayer Radio',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p02frcch',
|
||||
'id': 'p022h44j',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||
'duration': 3507,
|
||||
'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
|
||||
'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
|
||||
'duration': 227,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -172,13 +182,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}, {
|
||||
# iptv-all mediaset fails with geolocation however there is no geo restriction
|
||||
# for this programme at all
|
||||
'url': 'http://www.bbc.co.uk/programmes/b06bp7lf',
|
||||
'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
|
||||
'info_dict': {
|
||||
'id': 'b06bp7kf',
|
||||
'id': 'b06rkms3',
|
||||
'ext': 'flv',
|
||||
'title': "Annie Mac's Friday Night, B.Traits sits in for Annie",
|
||||
'description': 'B.Traits sits in for Annie Mac with a Mini-Mix from Disclosure.',
|
||||
'duration': 10800,
|
||||
'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
|
||||
'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -193,6 +202,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -469,7 +481,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage)
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
webpage, 'description', default=None)
|
||||
|
||||
@@ -58,11 +58,12 @@ class CCCIE(InfoExtractor):
|
||||
webpage, 'duration', fatal=False, group='duration'))
|
||||
|
||||
matches = re.finditer(r'''(?xs)
|
||||
<(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
|
||||
<(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s*
|
||||
<(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*
|
||||
<a\s+download\s+href='(?P<http_url>[^']+)'>\s*
|
||||
(?:
|
||||
.*?
|
||||
<a\s+href='(?P<torrent_url>[^']+\.torrent)'
|
||||
<a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'
|
||||
)?''', webpage)
|
||||
formats = []
|
||||
for m in matches:
|
||||
@@ -70,12 +71,15 @@ class CCCIE(InfoExtractor):
|
||||
format_id = self._search_regex(
|
||||
r'.*/([a-z0-9_-]+)/[^/]*$',
|
||||
m.group('http_url'), 'format id', default=None)
|
||||
if format_id:
|
||||
format_id = m.group('lang') + '-' + format_id
|
||||
vcodec = 'h264' if 'h264' in format_id else (
|
||||
'none' if format_id in ('mp3', 'opus') else None
|
||||
)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'format': format,
|
||||
'language': m.group('lang'),
|
||||
'url': m.group('http_url'),
|
||||
'vcodec': vcodec,
|
||||
'preference': preference(format_id),
|
||||
|
||||
@@ -108,8 +108,9 @@ class InfoExtractor(object):
|
||||
-2 or smaller for less than default.
|
||||
< -1000 to hide the format (if there is
|
||||
another one which is strictly better)
|
||||
* language_preference Is this in the correct requested
|
||||
language?
|
||||
* language Language code, e.g. "de" or "en-US".
|
||||
* language_preference Is this in the language mentioned in
|
||||
the URL?
|
||||
10 if it's what the URL is about,
|
||||
-1 for default (don't know),
|
||||
-10 otherwise, other values reserved for now.
|
||||
@@ -200,6 +201,26 @@ class InfoExtractor(object):
|
||||
end_time: Time in seconds where the reproduction should end, as
|
||||
specified in the URL.
|
||||
|
||||
The following fields should only be used when the video belongs to some logical
|
||||
chapter or section:
|
||||
|
||||
chapter: Name or title of the chapter the video belongs to.
|
||||
chapter_number: Number of the chapter the video belongs to, as an integer.
|
||||
chapter_id: Id of the chapter the video belongs to, as a unicode string.
|
||||
|
||||
The following fields should only be used when the video is an episode of some
|
||||
series or programme:
|
||||
|
||||
series: Title of the series or programme the video episode belongs to.
|
||||
season: Title of the season the video episode belongs to.
|
||||
season_number: Number of the season the video episode belongs to, as an integer.
|
||||
season_id: Id of the season the video episode belongs to, as a unicode string.
|
||||
episode: Title of the video episode. Unlike mandatory video title field,
|
||||
this field should denote the exact title of the video episode
|
||||
without any kind of decoration.
|
||||
episode_number: Number of the video episode within a season, as an integer.
|
||||
episode_id: Id of the video episode, as a unicode string.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
Unless mentioned otherwise, None is equivalent to absence of information.
|
||||
|
||||
@@ -329,8 +329,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
streamdata_req, video_id,
|
||||
note='Downloading media info for %s' % video_format)
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
video_url = stream_info.find('./host').text
|
||||
video_play_path = stream_info.find('./file').text
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
video_play_path = xpath_text(stream_info, './file')
|
||||
if not video_url or not video_play_path:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
'format': video_format,
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
@@ -66,13 +67,15 @@ class DramaFeverBaseIE(AMPIE):
|
||||
class DramaFeverIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
||||
'info_dict': {
|
||||
'id': '4512.1',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cooking with Shin 4512.1',
|
||||
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'timestamp': 1404336058,
|
||||
'upload_date': '20140702',
|
||||
@@ -82,7 +85,25 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
|
||||
'info_dict': {
|
||||
'id': '4826.4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mnet Asian Music Awards 2015 4826.4',
|
||||
'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
|
||||
'episode': 'Mnet Asian Music Awards 2015 - Part 3',
|
||||
'episode_number': 4,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'timestamp': 1450213200,
|
||||
'upload_date': '20151215',
|
||||
'duration': 5602,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).replace('/', '.')
|
||||
@@ -105,13 +126,22 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
video_id, 'Downloading episode info JSON', fatal=False)
|
||||
if episode_info:
|
||||
value = episode_info.get('value')
|
||||
if value:
|
||||
subfile = value[0].get('subfile') or value[0].get('new_subfile')
|
||||
if subfile and subfile != 'http://www.dramafever.com/st/':
|
||||
info.setdefault('subtitles', {}).setdefault('English', []).append({
|
||||
'ext': 'srt',
|
||||
'url': subfile,
|
||||
})
|
||||
if isinstance(value, list):
|
||||
for v in value:
|
||||
if v.get('type') == 'Episode':
|
||||
subfile = v.get('subfile') or v.get('new_subfile')
|
||||
if subfile and subfile != 'http://www.dramafever.com/st/':
|
||||
info.setdefault('subtitles', {}).setdefault('English', []).append({
|
||||
'ext': 'srt',
|
||||
'url': subfile,
|
||||
})
|
||||
episode_number = int_or_none(v.get('number'))
|
||||
episode_fallback = 'Episode'
|
||||
if episode_number:
|
||||
episode_fallback += ' %d' % episode_number
|
||||
info['episode'] = v.get('title') or episode_fallback
|
||||
info['episode_number'] = episode_number
|
||||
break
|
||||
|
||||
return info
|
||||
|
||||
|
||||
@@ -2,14 +2,10 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
class DreiSatIE(ZDFIE):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TESTS = [
|
||||
@@ -35,53 +31,4 @@ class DreiSatIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
|
||||
|
||||
status_code = details_doc.find('./status/statuscode')
|
||||
if status_code is not None and status_code.text != 'ok':
|
||||
code = status_code.text
|
||||
if code == 'notVisibleAnymore':
|
||||
message = 'Video %s is not available' % video_id
|
||||
else:
|
||||
message = '%s returned error: %s' % (self.IE_NAME, code)
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||
thumbnails = [{
|
||||
'width': int(te.attrib['key'].partition('x')[0]),
|
||||
'height': int(te.attrib['key'].partition('x')[2]),
|
||||
'url': te.text,
|
||||
} for te in thumbnail_els]
|
||||
|
||||
information_el = details_doc.find('.//information')
|
||||
video_title = information_el.find('./title').text
|
||||
video_description = information_el.find('./detail').text
|
||||
|
||||
details_el = details_doc.find('.//details')
|
||||
video_uploader = details_el.find('./channel').text
|
||||
upload_date = unified_strdate(details_el.find('./airtime').text)
|
||||
|
||||
format_els = details_doc.findall('.//formitaet')
|
||||
formats = [{
|
||||
'format_id': fe.attrib['basetype'],
|
||||
'width': int(fe.find('./width').text),
|
||||
'height': int(fe.find('./height').text),
|
||||
'url': fe.find('./url').text,
|
||||
'filesize': int(fe.find('./filesize').text),
|
||||
'video_bitrate': int(fe.find('./videoBitrate').text),
|
||||
} for fe in format_els
|
||||
if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'description': video_description,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': thumbnails[-1]['url'],
|
||||
'uploader': video_uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
return self.extract_from_xml_url(video_id, details_url)
|
||||
|
||||
@@ -7,6 +7,7 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
@@ -27,7 +28,7 @@ class IviIE(InfoExtractor):
|
||||
'title': 'Иван Васильевич меняет профессию',
|
||||
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
||||
'duration': 5498,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
},
|
||||
@@ -38,33 +39,25 @@ class IviIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '9549',
|
||||
'ext': 'mp4',
|
||||
'title': 'Двое из ларца - Серия 1',
|
||||
'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
|
||||
'series': 'Двое из ларца',
|
||||
'season': 'Сезон 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Дело Гольдберга (1 часть)',
|
||||
'episode_number': 1,
|
||||
'duration': 2655,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}
|
||||
]
|
||||
|
||||
# Sorted by quality
|
||||
_known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
|
||||
|
||||
# Sorted by size
|
||||
_known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480']
|
||||
|
||||
def _extract_description(self, html):
|
||||
m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html)
|
||||
return m.group('description') if m is not None else None
|
||||
|
||||
def _extract_comment_count(self, html):
|
||||
m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
|
||||
return int(m.group('commentcount')) if m is not None else 0
|
||||
_KNOWN_FORMATS = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
api_url = 'http://api.digitalaccess.ru/api/json/'
|
||||
|
||||
data = {
|
||||
'method': 'da.content.get',
|
||||
'params': [
|
||||
@@ -76,11 +69,10 @@ class IviIE(InfoExtractor):
|
||||
]
|
||||
}
|
||||
|
||||
request = sanitized_Request(api_url, json.dumps(data))
|
||||
|
||||
video_json_page = self._download_webpage(
|
||||
request = sanitized_Request(
|
||||
'http://api.digitalaccess.ru/api/json/', json.dumps(data))
|
||||
video_json = self._download_json(
|
||||
request, video_id, 'Downloading video JSON')
|
||||
video_json = json.loads(video_json_page)
|
||||
|
||||
if 'error' in video_json:
|
||||
error = video_json['error']
|
||||
@@ -95,35 +87,51 @@ class IviIE(InfoExtractor):
|
||||
formats = [{
|
||||
'url': x['url'],
|
||||
'format_id': x['content_format'],
|
||||
'preference': self._known_formats.index(x['content_format']),
|
||||
} for x in result['files'] if x['content_format'] in self._known_formats]
|
||||
'preference': self._KNOWN_FORMATS.index(x['content_format']),
|
||||
} for x in result['files'] if x['content_format'] in self._KNOWN_FORMATS]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
duration = result['duration']
|
||||
compilation = result['compilation']
|
||||
title = result['title']
|
||||
|
||||
duration = int_or_none(result.get('duration'))
|
||||
compilation = result.get('compilation')
|
||||
episode = title if compilation else None
|
||||
|
||||
title = '%s - %s' % (compilation, title) if compilation is not None else title
|
||||
|
||||
previews = result['preview']
|
||||
previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
|
||||
thumbnail = previews[-1]['url'] if len(previews) > 0 else None
|
||||
thumbnails = [{
|
||||
'url': preview['url'],
|
||||
'id': preview.get('content_format'),
|
||||
} for preview in result.get('preview', []) if preview.get('url')]
|
||||
|
||||
video_page = self._download_webpage(url, video_id, 'Downloading video page')
|
||||
description = self._extract_description(video_page)
|
||||
comment_count = self._extract_comment_count(video_page)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
season = self._search_regex(
|
||||
r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
|
||||
webpage, 'season', default=None)
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
|
||||
webpage, 'season number', default=None))
|
||||
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'<meta[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
|
||||
webpage, 'episode number', default=None))
|
||||
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'series': compilation,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'thumbnails': thumbnails,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -149,8 +157,11 @@ class IviCompilationIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _extract_entries(self, html, compilation_id):
|
||||
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
||||
for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)]
|
||||
return [
|
||||
self.url_result(
|
||||
'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
|
||||
for serie in re.findall(
|
||||
r'<a href="/watch/%s/(\d+)"[^>]+data-id="\1"' % compilation_id, html)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -158,7 +169,8 @@ class IviCompilationIE(InfoExtractor):
|
||||
season_id = mobj.group('seasonid')
|
||||
|
||||
if season_id is not None: # Season link
|
||||
season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
|
||||
season_page = self._download_webpage(
|
||||
url, compilation_id, 'Downloading season %s web page' % season_id)
|
||||
playlist_id = '%s/season%s' % (compilation_id, season_id)
|
||||
playlist_title = self._html_search_meta('title', season_page, 'title')
|
||||
entries = self._extract_entries(season_page, compilation_id)
|
||||
@@ -166,8 +178,9 @@ class IviCompilationIE(InfoExtractor):
|
||||
compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
|
||||
playlist_id = compilation_id
|
||||
playlist_title = self._html_search_meta('title', compilation_page, 'title')
|
||||
seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
|
||||
if len(seasons) == 0: # No seasons in this compilation
|
||||
seasons = re.findall(
|
||||
r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
|
||||
if not seasons: # No seasons in this compilation
|
||||
entries = self._extract_entries(compilation_page, compilation_id)
|
||||
else:
|
||||
entries = []
|
||||
|
||||
83
youtube_dl/extractor/ivideon.py
Normal file
83
youtube_dl/extractor/ivideon.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import qualities
|
||||
|
||||
|
||||
class IvideonIE(InfoExtractor):
|
||||
IE_NAME = 'ivideon'
|
||||
IE_DESC = 'Ivideon TV'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivideon\.com/tv/(?:[^/]+/)*camera/(?P<id>\d+-[\da-f]+)/(?P<camera_id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ivideon.com/tv/camera/100-916ca13b5c4ad9f564266424a026386d/0/',
|
||||
'info_dict': {
|
||||
'id': '100-916ca13b5c4ad9f564266424a026386d',
|
||||
'ext': 'flv',
|
||||
'title': 're:^Касса [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'Основное предназначение - запись действий кассиров. Плюс общий вид.',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.ivideon.com/tv/camera/100-c4ee4cb9ede885cf62dfbe93d7b53783/589824/?lang=ru',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ivideon.com/tv/map/22.917923/-31.816406/16/camera/100-e7bc16c7d4b5bbd633fd5350b66dfa9a/0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_QUALITIES = ('low', 'mid', 'hi')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
server_id, camera_id = mobj.group('id'), mobj.group('camera_id')
|
||||
camera_name, description = None, None
|
||||
camera_url = compat_urlparse.urljoin(
|
||||
url, '/tv/camera/%s/%s/' % (server_id, camera_id))
|
||||
|
||||
webpage = self._download_webpage(camera_url, server_id, fatal=False)
|
||||
if webpage:
|
||||
config_string = self._search_regex(
|
||||
r'var\s+config\s*=\s*({.+?});', webpage, 'config', default=None)
|
||||
if config_string:
|
||||
config = self._parse_json(config_string, server_id, fatal=False)
|
||||
camera_info = config.get('ivTvAppOptions', {}).get('currentCameraInfo')
|
||||
if camera_info:
|
||||
camera_name = camera_info.get('camera_name')
|
||||
description = camera_info.get('misc', {}).get('description')
|
||||
if not camera_name:
|
||||
camera_name = self._html_search_meta(
|
||||
'name', webpage, 'camera name', default=None) or self._search_regex(
|
||||
r'<h1[^>]+class="b-video-title"[^>]*>([^<]+)', webpage, 'camera name', default=None)
|
||||
|
||||
quality = qualities(self._QUALITIES)
|
||||
|
||||
formats = [{
|
||||
'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse.urlencode({
|
||||
'server': server_id,
|
||||
'camera': camera_id,
|
||||
'sessionId': 'demo',
|
||||
'q': quality(format_id),
|
||||
}),
|
||||
'format_id': format_id,
|
||||
'ext': 'flv',
|
||||
'quality': quality(format_id),
|
||||
} for format_id in self._QUALITIES]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': server_id,
|
||||
'title': self._live_title(camera_name or server_id),
|
||||
'description': description,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -71,6 +71,7 @@ class NowTVBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class NowTVIE(NowTVBaseIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
|
||||
|
||||
_TESTS = [{
|
||||
|
||||
82
youtube_dl/extractor/npr.py
Normal file
82
youtube_dl/extractor/npr.py
Normal file
@@ -0,0 +1,82 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class NprIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205',
|
||||
'info_dict': {
|
||||
'id': '449974205',
|
||||
'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More'
|
||||
},
|
||||
'playlist_count': 7,
|
||||
}, {
|
||||
'url': 'http://www.npr.org/player/v2/mediaPlayer.html?action=1&t=1&islist=false&id=446928052&m=446929930&live=1',
|
||||
'info_dict': {
|
||||
'id': '446928052',
|
||||
'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'"
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
|
||||
'info_dict': {
|
||||
'id': '446929930',
|
||||
'ext': 'mp3',
|
||||
'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)',
|
||||
'duration': 402,
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
config = self._download_json(
|
||||
'http://api.npr.org/query?%s' % compat_urllib_parse.urlencode({
|
||||
'id': playlist_id,
|
||||
'fields': 'titles,audio,show',
|
||||
'format': 'json',
|
||||
'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010',
|
||||
}), playlist_id)
|
||||
|
||||
story = config['list']['story'][0]
|
||||
|
||||
KNOWN_FORMATS = ('threegp', 'mp4', 'mp3')
|
||||
quality = qualities(KNOWN_FORMATS)
|
||||
|
||||
entries = []
|
||||
for audio in story.get('audio', []):
|
||||
title = audio.get('title', {}).get('$text')
|
||||
duration = int_or_none(audio.get('duration', {}).get('$text'))
|
||||
formats = []
|
||||
for format_id, formats_entry in audio.get('format', {}).items():
|
||||
if not formats_entry:
|
||||
continue
|
||||
if isinstance(formats_entry, list):
|
||||
formats_entry = formats_entry[0]
|
||||
format_url = formats_entry.get('$text')
|
||||
if not format_url:
|
||||
continue
|
||||
if format_id in KNOWN_FORMATS:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'ext': formats_entry.get('type'),
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': audio['id'],
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
playlist_title = story.get('title', {}).get('$text')
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -177,7 +178,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||
'info_dict': {
|
||||
'id': '001BLpXF2DyJe2',
|
||||
'title': '林俊杰',
|
||||
'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
|
||||
'description': 'md5:870ec08f7d8547c29c93010899103751',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}
|
||||
@@ -272,7 +273,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
|
||||
'url': 'http://y.qq.com/#type=toplist&p=top_3',
|
||||
'info_dict': {
|
||||
'id': 'top_3',
|
||||
'title': 'QQ音乐巅峰榜·欧美',
|
||||
'title': '巅峰榜·欧美',
|
||||
'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统'
|
||||
'计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据'
|
||||
'歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:'
|
||||
@@ -315,7 +316,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
||||
IE_DESC = 'QQ音乐 - 歌单'
|
||||
_VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://y.qq.com/#type=taoge&id=3462654915',
|
||||
'info_dict': {
|
||||
'id': '3462654915',
|
||||
@@ -323,7 +324,16 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
||||
'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
|
||||
},
|
||||
'playlist_count': 40,
|
||||
}
|
||||
'skip': 'playlist gone',
|
||||
}, {
|
||||
'url': 'http://y.qq.com/#type=taoge&id=1374105607',
|
||||
'info_dict': {
|
||||
'id': '1374105607',
|
||||
'title': '易入人心的华语民谣',
|
||||
'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
@@ -331,14 +341,21 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
||||
list_json = self._download_json(
|
||||
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'
|
||||
% list_id, list_id, 'Download list page',
|
||||
transform_source=strip_jsonp)['cdlist'][0]
|
||||
transform_source=strip_jsonp)
|
||||
if not len(list_json.get('cdlist', [])):
|
||||
if list_json.get('code'):
|
||||
raise ExtractorError(
|
||||
'QQ Music said: error %d in fetching playlist info' % list_json['code'],
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to get playlist info')
|
||||
|
||||
cdlist = list_json['cdlist'][0]
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
|
||||
) for song in list_json['songlist']
|
||||
) for song in cdlist['songlist']
|
||||
]
|
||||
|
||||
list_name = list_json.get('dissname')
|
||||
list_description = clean_html(unescapeHTML(list_json.get('desc')))
|
||||
list_name = cdlist.get('dissname')
|
||||
list_description = clean_html(unescapeHTML(cdlist.get('desc')))
|
||||
return self.playlist_result(entries, list_id, list_name, list_description)
|
||||
|
||||
127
youtube_dl/extractor/revision3.py
Normal file
127
youtube_dl/extractor/revision3.py
Normal file
@@ -0,0 +1,127 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class Revision3IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
|
||||
'md5': 'd94a72d85d0a829766de4deb8daaf7df',
|
||||
'info_dict': {
|
||||
'id': '73034',
|
||||
'display_id': 'technobuffalo/5-google-predictions-for-2016',
|
||||
'ext': 'webm',
|
||||
'title': '5 Google Predictions for 2016',
|
||||
'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.',
|
||||
'upload_date': '20151228',
|
||||
'timestamp': 1451325600,
|
||||
'duration': 187,
|
||||
'uploader': 'TechnoBuffalo',
|
||||
'uploader_id': 'technobuffalo',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://testtube.com/brainstuff',
|
||||
'info_dict': {
|
||||
'id': '251',
|
||||
'title': 'BrainStuff',
|
||||
'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.',
|
||||
},
|
||||
'playlist_mincount': 93,
|
||||
}, {
|
||||
'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
|
||||
'info_dict': {
|
||||
'id': '60163',
|
||||
'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
|
||||
'duration': 275,
|
||||
'ext': 'webm',
|
||||
'title': '5 Weird Ways Plants Can Eat Animals',
|
||||
'description': 'Why have some plants evolved to eat meat?',
|
||||
'upload_date': '20150120',
|
||||
'timestamp': 1421763300,
|
||||
'uploader': 'DNews',
|
||||
'uploader_id': 'dnews',
|
||||
},
|
||||
}]
|
||||
_PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
|
||||
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
||||
page_info = self._download_json(
|
||||
self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
|
||||
|
||||
if page_info['data']['type'] == 'episode':
|
||||
episode_data = page_info['data']
|
||||
video_id = compat_str(episode_data['video']['data']['id'])
|
||||
video_data = self._download_json(
|
||||
'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
|
||||
video_id)['items'][0]
|
||||
|
||||
formats = []
|
||||
for vcodec, media in video_data['media'].items():
|
||||
for quality_id, quality in media.items():
|
||||
if quality_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality['url'], video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': quality['url'],
|
||||
'format_id': '%s-%s' % (vcodec, quality_id),
|
||||
'tbr': int_or_none(quality.get('bitrate')),
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
preference = qualities(['mini', 'small', 'medium', 'large'])
|
||||
thumbnails = [{
|
||||
'url': image_url,
|
||||
'id': image_id,
|
||||
'preference': preference(image_id)
|
||||
} for image_id, image_url in video_data.get('images', {}).items()]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': unescapeHTML(video_data['title']),
|
||||
'description': unescapeHTML(video_data.get('summary')),
|
||||
'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '),
|
||||
'author': episode_data.get('author'),
|
||||
'uploader': video_data.get('show', {}).get('name'),
|
||||
'uploader_id': video_data.get('show', {}).get('slug'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
else:
|
||||
show_data = page_info['show']['data']
|
||||
episodes_data = page_info['episodes']['data']
|
||||
num_episodes = page_info['meta']['totalEpisodes']
|
||||
processed_episodes = 0
|
||||
entries = []
|
||||
page_num = 1
|
||||
while True:
|
||||
entries.extend([self.url_result(
|
||||
'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data])
|
||||
processed_episodes += len(episodes_data)
|
||||
if processed_episodes == num_episodes:
|
||||
break
|
||||
page_num += 1
|
||||
episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % (
|
||||
domain, display_id + '/' + compat_str(page_num), domain),
|
||||
display_id)['episodes']['data']
|
||||
|
||||
return self.playlist_result(
|
||||
entries, compat_str(show_data['id']),
|
||||
show_data.get('name'), show_data.get('summary'))
|
||||
@@ -2,19 +2,22 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class RteIE(InfoExtractor):
|
||||
IE_NAME = 'rte'
|
||||
IE_DESC = 'Raidió Teilifís Éireann TV'
|
||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
|
||||
'info_dict': {
|
||||
'id': '10478715',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Watch iWitness online',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.',
|
||||
@@ -44,13 +47,6 @@ class RteIE(InfoExtractor):
|
||||
# f4m_url = server + relative_url
|
||||
f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
|
||||
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
f4m_formats = [{
|
||||
'format_id': f['format_id'],
|
||||
'url': f['url'],
|
||||
'ext': 'mp4',
|
||||
'width': f['width'],
|
||||
'height': f['height'],
|
||||
} for f in f4m_formats]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -60,3 +56,73 @@ class RteIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
|
||||
class RteRadioIE(InfoExtractor):
|
||||
IE_NAME = 'rte:radio'
|
||||
IE_DESC = 'Raidió Teilifís Éireann radio'
|
||||
# Radioplayer URLs have the specifier #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
||||
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
|
||||
# An <id> uniquely defines an individual recording, and is the only part we require.
|
||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:[0-9]*)(?:%3A|:)(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
|
||||
'info_dict': {
|
||||
'id': '10507902',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gloria',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:9ce124a7fb41559ec68f06387cabddf0',
|
||||
'timestamp': 1451203200,
|
||||
'upload_date': '20151227',
|
||||
'duration': 7230.0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'f4m fails with --test atm'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
|
||||
json_string = self._download_json(
|
||||
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
|
||||
item_id)
|
||||
|
||||
# NB the string values in the JSON are stored using XML escaping(!)
|
||||
show = json_string['shows'][0]
|
||||
title = unescapeHTML(show['title'])
|
||||
description = unescapeHTML(show.get('description'))
|
||||
thumbnail = show.get('thumbnail')
|
||||
duration = float_or_none(show.get('duration'), 1000)
|
||||
timestamp = parse_iso8601(show.get('published'))
|
||||
|
||||
mg = show['media:group'][0]
|
||||
|
||||
formats = []
|
||||
|
||||
if mg.get('url') and not mg['url'].startswith('rtmpe:'):
|
||||
formats.append({'url': mg['url']})
|
||||
|
||||
if mg.get('hls_server') and mg.get('hls_url'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
if mg.get('hds_server') and mg.get('hds_url'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
mg['hds_server'] + mg['hds_url'], item_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': item_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -75,9 +75,12 @@ class RuutuIE(InfoExtractor):
|
||||
preference = -1 if proto == 'rtmp' else 1
|
||||
label = child.get('label')
|
||||
tbr = int_or_none(child.get('bitrate'))
|
||||
format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto
|
||||
if not self._is_valid_url(video_url, video_id, format_id):
|
||||
continue
|
||||
width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]]
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (proto, label if label else tbr),
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
|
||||
@@ -384,27 +384,24 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
resource = mobj.group('rsrc') or 'all'
|
||||
base_url = self._BASE_URL_MAP[resource] % user['id']
|
||||
|
||||
next_href = None
|
||||
COMMON_QUERY = {
|
||||
'limit': 50,
|
||||
'client_id': self._CLIENT_ID,
|
||||
'linked_partitioning': '1',
|
||||
}
|
||||
|
||||
query = COMMON_QUERY.copy()
|
||||
query['offset'] = 0
|
||||
|
||||
next_href = base_url + '?' + compat_urllib_parse.urlencode(query)
|
||||
|
||||
entries = []
|
||||
for i in itertools.count():
|
||||
if not next_href:
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'offset': i * 50,
|
||||
'limit': 50,
|
||||
'client_id': self._CLIENT_ID,
|
||||
'linked_partitioning': '1',
|
||||
'representation': 'speedy',
|
||||
})
|
||||
next_href = base_url + '?' + data
|
||||
|
||||
response = self._download_json(
|
||||
next_href, uploader, 'Downloading track page %s' % (i + 1))
|
||||
|
||||
collection = response['collection']
|
||||
|
||||
if not collection:
|
||||
self.to_screen('%s: End page received' % uploader)
|
||||
break
|
||||
|
||||
def resolve_permalink_url(candidates):
|
||||
@@ -419,12 +416,15 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
if permalink_url:
|
||||
entries.append(self.url_result(permalink_url))
|
||||
|
||||
if 'next_href' in response:
|
||||
next_href = response['next_href']
|
||||
if not next_href:
|
||||
break
|
||||
else:
|
||||
next_href = None
|
||||
next_href = response.get('next_href')
|
||||
if not next_href:
|
||||
break
|
||||
|
||||
parsed_next_href = compat_urlparse.urlparse(response['next_href'])
|
||||
qs = compat_urlparse.parse_qs(parsed_next_href.query)
|
||||
qs.update(COMMON_QUERY)
|
||||
next_href = compat_urlparse.urlunparse(
|
||||
parsed_next_href._replace(query=compat_urllib_parse.urlencode(qs, True)))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class TestTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://testtube\.com/[^/?#]+/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
|
||||
'info_dict': {
|
||||
'id': '60163',
|
||||
'display_id': '5-weird-ways-plants-can-eat-animals',
|
||||
'duration': 275,
|
||||
'ext': 'webm',
|
||||
'title': '5 Weird Ways Plants Can Eat Animals',
|
||||
'description': 'Why have some plants evolved to eat meat?',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'DNews',
|
||||
'uploader_id': 'dnews',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://testtube.com/iflscience/insane-jet-ski-flipping',
|
||||
'info_dict': {
|
||||
'id': 'fAGfJ4YjVus',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flipping Jet-Ski Skills | Outrageous Acts of Science',
|
||||
'uploader': 'Science Channel',
|
||||
'uploader_id': 'ScienceChannel',
|
||||
'upload_date': '20150203',
|
||||
'description': 'md5:e61374030015bae1d2e22f096d4769d6',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
|
||||
webpage, 'youtube iframe', default=None)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, 'Youtube', video_id=display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
|
||||
webpage, 'video ID')
|
||||
|
||||
all_info = self._download_json(
|
||||
'https://testtube.com/api/getPlaylist.json?api_key=ba9c741bce1b9d8e3defcc22193f3651b8867e62&codecs=h264,vp8,theora&video_id=%s' % video_id,
|
||||
video_id)
|
||||
info = all_info['items'][0]
|
||||
|
||||
formats = []
|
||||
for vcodec, fdatas in info['media'].items():
|
||||
for name, fdata in fdatas.items():
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (vcodec, name),
|
||||
'url': fdata['url'],
|
||||
'vcodec': vcodec,
|
||||
'tbr': fdata.get('bitrate'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(info.get('duration'))
|
||||
images = info.get('images')
|
||||
thumbnails = None
|
||||
preference = qualities(['mini', 'small', 'medium', 'large'])
|
||||
if images:
|
||||
thumbnails = [{
|
||||
'id': thumbnail_id,
|
||||
'url': img_url,
|
||||
'preference': preference(thumbnail_id)
|
||||
} for thumbnail_id, img_url in images.items()]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'description': info.get('summary'),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': info.get('show', {}).get('name'),
|
||||
'uploader_id': info.get('show', {}).get('slug'),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
73
youtube_dl/extractor/twentymin.py
Normal file
73
youtube_dl/extractor/twentymin.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class TwentyMinutenIE(InfoExtractor):
|
||||
IE_NAME = '20min'
|
||||
_VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P<id>\d+)|(?:[^/]+/)*(?P<display_id>[^/#?]+))'
|
||||
_TESTS = [{
|
||||
# regular video
|
||||
'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
|
||||
'md5': 'b52d6bc6ea6398e6a38f12cfd418149c',
|
||||
'info_dict': {
|
||||
'id': '469148',
|
||||
'ext': 'flv',
|
||||
'title': '85 000 Franken für 15 perfekte Minuten',
|
||||
'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)',
|
||||
'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg'
|
||||
}
|
||||
}, {
|
||||
# news article with video
|
||||
'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469',
|
||||
'md5': 'cd4cbb99b94130cff423e967cd275e5e',
|
||||
'info_dict': {
|
||||
'id': '469408',
|
||||
'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469',
|
||||
'ext': 'flv',
|
||||
'title': '«Wir müssen mutig nach vorne schauen»',
|
||||
'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
|
||||
'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>.*?<span>(.+?)</span></h1>',
|
||||
webpage, 'title', default=None)
|
||||
if not title:
|
||||
title = remove_end(re.sub(
|
||||
r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
|
||||
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
|
||||
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -19,8 +20,6 @@ class UdemyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
|
||||
_ORIGIN_URL = 'https://www.udemy.com'
|
||||
_SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<'
|
||||
_ALREADY_ENROLLED = '>You are already taking this course.<'
|
||||
_NETRC_MACHINE = 'udemy'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -37,15 +36,21 @@ class UdemyIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _enroll_course(self, webpage, course_id):
|
||||
enroll_url = self._search_regex(
|
||||
checkout_url = unescapeHTML(self._search_regex(
|
||||
r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1',
|
||||
webpage, 'checkout url', group='url', default=None))
|
||||
if checkout_url:
|
||||
raise ExtractorError(
|
||||
'Course %s is not free. You have to pay for it before you can download. '
|
||||
'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True)
|
||||
|
||||
enroll_url = unescapeHTML(self._search_regex(
|
||||
r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/course/subscribe/.+?)\1',
|
||||
webpage, 'enroll url', group='url',
|
||||
default='https://www.udemy.com/course/subscribe/?courseId=%s' % course_id)
|
||||
webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course')
|
||||
if self._SUCCESSFULLY_ENROLLED in webpage:
|
||||
self.to_screen('%s: Successfully enrolled in' % course_id)
|
||||
elif self._ALREADY_ENROLLED in webpage:
|
||||
self.to_screen('%s: Already enrolled in' % course_id)
|
||||
webpage, 'enroll url', group='url', default=None))
|
||||
if enroll_url:
|
||||
webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course')
|
||||
if '>You have enrolled in' in webpage:
|
||||
self.to_screen('%s: Successfully enrolled in the course' % course_id)
|
||||
|
||||
def _download_lecture(self, course_id, lecture_id):
|
||||
return self._download_json(
|
||||
@@ -244,10 +249,25 @@ class UdemyCourseIE(UdemyIE):
|
||||
'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
|
||||
course_id, 'Downloading course curriculum')
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
|
||||
for asset in response if asset.get('assetType') or asset.get('asset_type') == 'Video'
|
||||
]
|
||||
entries = []
|
||||
chapter, chapter_number = None, None
|
||||
for asset in response:
|
||||
asset_type = asset.get('assetType') or asset.get('asset_type')
|
||||
if asset_type == 'Video':
|
||||
asset_id = asset.get('id')
|
||||
if asset_id:
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']),
|
||||
'ie_key': UdemyIE.ie_key(),
|
||||
}
|
||||
if chapter_number:
|
||||
entry['chapter_number'] = chapter_number
|
||||
if chapter:
|
||||
entry['chapter'] = chapter
|
||||
entries.append(entry)
|
||||
elif asset.get('type') == 'chapter':
|
||||
chapter_number = asset.get('index') or asset.get('object_index')
|
||||
chapter = asset.get('title')
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
||||
|
||||
@@ -23,11 +23,56 @@ class VideomoreIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': 'В гостях Алексей Чумаков и Юлия Ковальчук',
|
||||
'description': 'В гостях – лучшие романтические комедии года, «Выживший» Иньярриту и «Стив Джобс» Дэнни Бойла.',
|
||||
'series': 'Кино в деталях',
|
||||
'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук',
|
||||
'episode_number': None,
|
||||
'season': 'Сезон 2015',
|
||||
'season_number': 5,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 2910,
|
||||
'age_limit': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videomore.ru/embed/259974',
|
||||
'info_dict': {
|
||||
'id': '259974',
|
||||
'ext': 'flv',
|
||||
'title': '80 серия',
|
||||
'description': '«Медведей» ждет решающий матч. Макеев выясняет отношения со Стрельцовым. Парни узнают подробности прошлого Макеева.',
|
||||
'series': 'Молодежка',
|
||||
'episode': '80 серия',
|
||||
'episode_number': 40,
|
||||
'season': '2 сезон',
|
||||
'season_number': 2,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 2809,
|
||||
'age_limit': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videomore.ru/molodezhka/sezon_promo/341073',
|
||||
'info_dict': {
|
||||
'id': '341073',
|
||||
'ext': 'flv',
|
||||
'title': 'Команда проиграла из-за Бакина?',
|
||||
'description': 'Молодежка 3 сезон скоро',
|
||||
'series': 'Молодежка',
|
||||
'episode': 'Команда проиграла из-за Бакина?',
|
||||
'episode_number': None,
|
||||
'season': 'Промо',
|
||||
'season_number': 99,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 29,
|
||||
'age_limit': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videomore.ru/elki_3?track_id=364623',
|
||||
'only_matching': True,
|
||||
@@ -81,10 +126,21 @@ class VideomoreIE(InfoExtractor):
|
||||
'url': thumbnail,
|
||||
} for thumbnail in data.get('big_thumbnail_urls', [])]
|
||||
|
||||
series = data.get('project_title')
|
||||
episode = data.get('title')
|
||||
episode_number = int_or_none(data.get('episode_of_season') or None)
|
||||
season = data.get('season_title')
|
||||
season_number = int_or_none(data.get('season_pos') or None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
|
||||
@@ -11,6 +11,7 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
encode_dict,
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
@@ -208,6 +209,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# source file returns 403: Forbidden
|
||||
'url': 'https://vimeo.com/7809605',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
@@ -217,7 +223,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||
if mobj:
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
surl = smuggle_url(player_url, {'http_headers': {'Referer': url}})
|
||||
return surl
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
@@ -262,11 +268,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, data = unsmuggle_url(url)
|
||||
url, data = unsmuggle_url(url, {})
|
||||
headers = std_headers
|
||||
if data is not None:
|
||||
if 'http_headers' in data:
|
||||
headers = headers.copy()
|
||||
headers.update(data)
|
||||
headers.update(data['http_headers'])
|
||||
if 'Referer' not in headers:
|
||||
headers['Referer'] = url
|
||||
|
||||
@@ -342,7 +348,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
|
||||
|
||||
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
|
||||
if data and '_video_password_verified' in data:
|
||||
if '_video_password_verified' in data:
|
||||
raise ExtractorError('video password verification failed!')
|
||||
self._verify_video_password(url, video_id, webpage)
|
||||
return self._real_extract(
|
||||
@@ -354,6 +360,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if config.get('view') == 4:
|
||||
config = self._verify_player_video_password(url, video_id)
|
||||
|
||||
if '>You rented this title.<' in webpage:
|
||||
feature_id = config.get('video', {}).get('vod', {}).get('feature_id')
|
||||
if feature_id and not data.get('force_feature_id', False):
|
||||
return self.url_result(smuggle_url(
|
||||
'https://player.vimeo.com/player/%s' % feature_id,
|
||||
{'force_feature_id': True}), 'Vimeo')
|
||||
|
||||
# Extract title
|
||||
video_title = config["video"]["title"]
|
||||
|
||||
@@ -412,16 +425,21 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
download_data = self._download_json(download_request, video_id, fatal=False)
|
||||
if download_data:
|
||||
source_file = download_data.get('source_file')
|
||||
if source_file and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
|
||||
formats.append({
|
||||
'url': source_file['download_url'],
|
||||
'ext': source_file['extension'].lower(),
|
||||
'width': int_or_none(source_file.get('width')),
|
||||
'height': int_or_none(source_file.get('height')),
|
||||
'filesize': parse_filesize(source_file.get('size')),
|
||||
'format_id': source_file.get('public_name', 'Original'),
|
||||
'preference': 1,
|
||||
})
|
||||
if isinstance(source_file, dict):
|
||||
download_url = source_file.get('download_url')
|
||||
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
|
||||
source_name = source_file.get('public_name', 'Original')
|
||||
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
|
||||
ext = source_file.get('extension', determine_ext(download_url)).lower(),
|
||||
formats.append({
|
||||
'url': download_url,
|
||||
'ext': ext,
|
||||
'width': int_or_none(source_file.get('width')),
|
||||
'height': int_or_none(source_file.get('height')),
|
||||
'filesize': parse_filesize(source_file.get('size')),
|
||||
'format_id': source_name,
|
||||
'preference': 1,
|
||||
})
|
||||
config_files = config['video'].get('files') or config['request'].get('files', {})
|
||||
for f in config_files.get('progressive', []):
|
||||
video_url = f.get('url')
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import float_or_none
|
||||
|
||||
|
||||
class VRTIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:deredactie|sporza|cobra)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
|
||||
_VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
|
||||
_TESTS = [
|
||||
# deredactie.be
|
||||
{
|
||||
@@ -52,6 +52,10 @@ class VRTIE(InfoExtractor):
|
||||
'duration': 661,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -69,11 +73,11 @@ class VRTIE(InfoExtractor):
|
||||
if mobj:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
'%s/%s' % (mobj.group('server'), mobj.group('path')),
|
||||
video_id, 'mp4'))
|
||||
video_id, 'mp4', m3u8_id='hls'))
|
||||
mobj = re.search(r'data-video-src="(?P<src>[^"]+)"', webpage)
|
||||
if mobj:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
'%s/manifest.f4m' % mobj.group('src'), video_id))
|
||||
'%s/manifest.f4m' % mobj.group('src'), video_id, f4m_id='hds'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
@@ -4,10 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'title': 'FemaleAgent Shy beauty takes the bait',
|
||||
'upload_date': '20121014',
|
||||
'uploader': 'Ruseful2011',
|
||||
'duration': 893,
|
||||
'duration': 893.52,
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
@@ -34,7 +34,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'title': 'Britney Spears Sexy Booty',
|
||||
'upload_date': '20130914',
|
||||
'uploader': 'jojo747400',
|
||||
'duration': 200,
|
||||
'duration': 200.48,
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
@@ -64,20 +64,21 @@ class XHamsterIE(InfoExtractor):
|
||||
webpage = self._download_webpage(mrss_url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<title>(?P<title>.+?)(?:, (?:[^,]+? )?Porn: xHamster| - xHamster\.com)</title>',
|
||||
r'<h1>([^<]+)</h1>'], webpage, 'title')
|
||||
[r'<h1[^>]*>([^<]+)</h1>',
|
||||
r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"',
|
||||
r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
|
||||
webpage, 'title')
|
||||
|
||||
# Only a few videos have an description
|
||||
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
|
||||
description = mobj.group(1) if mobj else None
|
||||
|
||||
upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'',
|
||||
webpage, 'upload date', fatal=False)
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'hint=["\'](\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r"<a href='[^']+xhamster\.com/user/[^>]+>(?P<uploader>[^<]+)",
|
||||
r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+href=["\'].+?xhamster\.com/user/[^>]+>(?P<uploader>.+?)</a>',
|
||||
webpage, 'uploader', default='anonymous')
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
@@ -85,12 +86,13 @@ class XHamsterIE(InfoExtractor):
|
||||
r'''<video[^>]+poster=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
|
||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||
|
||||
duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
|
||||
webpage, 'duration', fatal=False))
|
||||
duration = float_or_none(self._search_regex(
|
||||
r'(["\'])duration\1\s*:\s*(["\'])(?P<duration>.+?)\2',
|
||||
webpage, 'duration', fatal=False, group='duration'))
|
||||
|
||||
view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False)
|
||||
if view_count:
|
||||
view_count = str_to_int(view_count)
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'content=["\']User(?:View|Play)s:(\d+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
mobj = re.search(r"hint='(?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes'", webpage)
|
||||
(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
|
||||
|
||||
@@ -155,7 +155,16 @@ class YahooIE(InfoExtractor):
|
||||
'description': 'md5:8fc39608213295748e1e289807838c97',
|
||||
'duration': 1646,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# it uses an alias to get the video_id
|
||||
'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html',
|
||||
'info_dict': {
|
||||
'id': '40eda9c8-8e5f-3552-8745-830f67d0c737',
|
||||
'ext': 'mp4',
|
||||
'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking',
|
||||
'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -199,13 +208,22 @@ class YahooIE(InfoExtractor):
|
||||
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
|
||||
default=None)
|
||||
if items_json is None:
|
||||
CONTENT_ID_REGEXES = [
|
||||
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
|
||||
r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
|
||||
r'"first_videoid"\s*:\s*"([^"]+)"',
|
||||
r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
|
||||
]
|
||||
video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
|
||||
alias = self._search_regex(
|
||||
r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None)
|
||||
if alias is not None:
|
||||
alias_info = self._download_json(
|
||||
'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias,
|
||||
display_id, 'Downloading alias info')
|
||||
video_id = alias_info[0]['id']
|
||||
else:
|
||||
CONTENT_ID_REGEXES = [
|
||||
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
|
||||
r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
|
||||
r'"first_videoid"\s*:\s*"([^"]+)"',
|
||||
r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
|
||||
]
|
||||
video_id = self._search_regex(
|
||||
CONTENT_ID_REGEXES, webpage, 'content ID')
|
||||
else:
|
||||
items = json.loads(items_json)
|
||||
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
||||
|
||||
@@ -1235,10 +1235,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_description = re.sub(r'''(?x)
|
||||
<a\s+
|
||||
(?:[a-zA-Z-]+="[^"]+"\s+)*?
|
||||
title="([^"]+)"\s+
|
||||
(?:title|href)="([^"]+)"\s+
|
||||
(?:[a-zA-Z-]+="[^"]+"\s+)*?
|
||||
class="yt-uix-redirect-link"\s*>
|
||||
[^<]+
|
||||
class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)".*?>
|
||||
[^<]+\.{3}\s*
|
||||
</a>
|
||||
''', r'\1', video_description)
|
||||
video_description = clean_html(video_description)
|
||||
|
||||
@@ -13,6 +13,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
qualities,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -59,7 +60,6 @@ class ZDFIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'format_id': '%s-%d' % (proto, bitrate),
|
||||
'tbr': bitrate,
|
||||
'protocol': proto,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
@@ -70,6 +70,15 @@ class ZDFIE(InfoExtractor):
|
||||
note='Downloading video info',
|
||||
errnote='Failed to download video info')
|
||||
|
||||
status_code = doc.find('./status/statuscode')
|
||||
if status_code is not None and status_code.text != 'ok':
|
||||
code = status_code.text
|
||||
if code == 'notVisibleAnymore':
|
||||
message = 'Video %s is not available' % video_id
|
||||
else:
|
||||
message = '%s returned error: %s' % (self.IE_NAME, code)
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
title = doc.find('.//information/title').text
|
||||
description = xpath_text(doc, './/information/detail', 'description')
|
||||
duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
|
||||
@@ -129,10 +138,10 @@ class ZDFIE(InfoExtractor):
|
||||
video_url, video_id, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds', fatal=False))
|
||||
video_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
proto = format_m.group('proto').lower()
|
||||
|
||||
|
||||
@@ -70,6 +70,21 @@ ENGLISH_MONTH_NAMES = [
|
||||
'January', 'February', 'March', 'April', 'May', 'June',
|
||||
'July', 'August', 'September', 'October', 'November', 'December']
|
||||
|
||||
KNOWN_EXTENSIONS = (
|
||||
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
|
||||
'flv', 'f4v', 'f4a', 'f4b',
|
||||
'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
|
||||
'mkv', 'mka', 'mk3d',
|
||||
'avi', 'divx',
|
||||
'mov',
|
||||
'asf', 'wmv', 'wma',
|
||||
'3gp', '3g2',
|
||||
'mp3',
|
||||
'flac',
|
||||
'ape',
|
||||
'wav',
|
||||
'f4f', 'f4m', 'm3u8', 'smil')
|
||||
|
||||
|
||||
def preferredencoding():
|
||||
"""Get preferred encoding.
|
||||
@@ -942,20 +957,8 @@ def determine_ext(url, default_ext='unknown_video'):
|
||||
guess = url.partition('?')[0].rpartition('.')[2]
|
||||
if re.match(r'^[A-Za-z0-9]+$', guess):
|
||||
return guess
|
||||
elif guess.rstrip('/') in (
|
||||
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
|
||||
'flv', 'f4v', 'f4a', 'f4b',
|
||||
'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
|
||||
'mkv', 'mka', 'mk3d',
|
||||
'avi', 'divx',
|
||||
'mov',
|
||||
'asf', 'wmv', 'wma',
|
||||
'3gp', '3g2',
|
||||
'mp3',
|
||||
'flac',
|
||||
'ape',
|
||||
'wav',
|
||||
'f4f', 'f4m', 'm3u8', 'smil'):
|
||||
# Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
|
||||
elif guess.rstrip('/') in KNOWN_EXTENSIONS:
|
||||
return guess.rstrip('/')
|
||||
else:
|
||||
return default_ext
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.01.01'
|
||||
__version__ = '2016.01.09'
|
||||
|
||||
Reference in New Issue
Block a user