Compare commits

...

37 Commits

Author SHA1 Message Date
Philipp Hagemeister
73c566695f release 2013.11.17 2013-11-17 22:14:13 +01:00
Philipp Hagemeister
63b7b7224a [MTVIE] Try with RTMP URL if download fails
This fixes youtube-dl http://www.southpark.de/clips/155251/cartman-vs-the-dog-whisperer
2013-11-17 22:11:40 +01:00
Philipp Hagemeister
ce80c8b8ee Merge pull request #1784 from rzhxeo/southpark
Add support for southpark.de
2013-11-17 12:15:13 -08:00
Philipp Hagemeister
749febf4d1 Allow --console-title when --quiet is given (Fixes #1783) 2013-11-17 21:12:50 +01:00
Philipp Hagemeister
bdde425cbe Save and restore console title (Fixes #1782) 2013-11-17 21:10:11 +01:00
rzhxeo
746f491f82 Add support for southpark.de 2013-11-17 17:54:47 +01:00
rzhxeo
1672647ade [SouthParkStudiosIE] Move from _TEST to _TESTS 2013-11-17 17:43:58 +01:00
rzhxeo
90b6bbc38c [SouthParkStudiosIE] Also detect urls without http:// or www 2013-11-17 17:42:24 +01:00
Philipp Hagemeister
ce02ed60f2 Remove * imports 2013-11-17 16:47:52 +01:00
Philipp Hagemeister
1e5b9a95fd Move console_title to YoutubeDL 2013-11-17 11:39:52 +01:00
Philipp Hagemeister
1d699755e0 [youtube] Add view_count (Fixes #1781) 2013-11-17 11:06:16 +01:00
Philipp Hagemeister
ddf49c6344 [arte] remove two typos 2013-11-17 11:05:49 +01:00
Philipp Hagemeister
d1c252048b [redtube] Do not test md5, seems to vary 2013-11-16 10:30:09 +01:00
Philipp Hagemeister
eab2724138 [gamekings] Do not test md5 sum, precise file changes regularly 2013-11-16 02:32:23 +01:00
Philipp Hagemeister
21ea3e06c9 [gamekings] remove unnecessary import 2013-11-16 02:31:02 +01:00
Philipp Hagemeister
52d703d3d1 [tvp] Skip tests 2013-11-16 02:09:30 +01:00
Philipp Hagemeister
ce152341a1 [bambuser] Do not test for MD5, seems to be flaky 2013-11-16 01:59:28 +01:00
Philipp Hagemeister
f058e34011 [dailymotion] Fix playlists 2013-11-16 01:56:23 +01:00
Philipp Hagemeister
b5349e8721 Fix indentation of (best) and (worst) in --list-formats 2013-11-16 01:39:45 +01:00
Philipp Hagemeister
7150858d49 [spiegel] Implement format selection 2013-11-16 01:33:12 +01:00
Philipp Hagemeister
91c7271aab Add automatic generation of format note based on bitrate and codecs 2013-11-16 01:08:43 +01:00
Philipp Hagemeister
aa13b2dffd release 2013.11.15.1 2013-11-15 14:35:00 +01:00
Philipp Hagemeister
fc2ef392be [ted] Fix playlists (Fixes #1770) 2013-11-15 14:33:51 +01:00
Philipp Hagemeister
463a908705 [ted] simplify 2013-11-15 14:06:38 +01:00
Jaime Marquínez Ferrándiz
d24ffe1cfa [rtlnow] Remove the test for nitro
The videos expire.
2013-11-15 12:57:59 +01:00
Jaime Marquínez Ferrándiz
78fb87b283 Don't accept '>' inside the content attribute in OpenGraph regexes 2013-11-15 12:54:13 +01:00
Jaime Marquínez Ferrándiz
ab2d524780 Improve the OpenGraph regex
* Do not accept '>' between the property and content attributes.
* Recognize the properties if the content attribute is before the property attribute using two regexes (fixes the extraction of the description for SlideshareIE).
2013-11-15 12:24:54 +01:00
Jaime Marquínez Ferrándiz
85d61685f1 [tvp] Update the title and the description of the test video 2013-11-15 12:10:22 +01:00
Jaime Marquínez Ferrándiz
b9643eed7c [youtube:channel] Fix the extraction of autogenerated channels
The ajax pages are empty, now it looks directly in the channel's /videos page
2013-11-15 11:51:45 +01:00
Jaime Marquínez Ferrándiz
feee2ecfa9 Pass the 'download' argument to 'process_video_result' (fixes #1769) 2013-11-15 11:04:26 +01:00
Philipp Hagemeister
a25a5cfeec release 2013.11.15 2013-11-15 01:47:15 +01:00
Philipp Hagemeister
0e145dd541 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-15 01:46:50 +01:00
Philipp Hagemeister
9f9be844fc [youtube] Fix protocol-independent URLs (Fixes #1768) 2013-11-15 01:45:39 +01:00
Jaime Marquínez Ferrándiz
e3b9ab5e18 [soundlcoud] Set the correct extension for the tracks (fixes #1766)
Some tracks are not in mp3 format, they can be wav files.
2013-11-14 19:45:39 +01:00
Jaime Marquínez Ferrándiz
c66d2baa9c [livestream] Add an extractor for the original version of livestream (closes #1764)
The two versions use different systems.
2013-11-14 13:16:32 +01:00
Jaime Marquínez Ferrándiz
ca715127a2 Don't assume the 'subtitlesformat' is set in the params dict (fixes #1750) 2013-11-13 17:14:10 +01:00
Jaime Marquínez Ferrándiz
ea7a7af1d4 [gamekings] Fix the test video checksum 2013-11-13 17:13:06 +01:00
22 changed files with 331 additions and 163 deletions

View File

@@ -5,9 +5,6 @@ import subprocess
import sys
import time
if os.name == 'nt':
import ctypes
from .utils import (
compat_urllib_error,
compat_urllib_request,
@@ -151,16 +148,8 @@ class FileDownloader(object):
def to_stderr(self, message):
self.ydl.to_screen(message)
def to_cons_title(self, message):
"""Set console/terminal window title to message."""
if not self.params.get('consoletitle', False):
return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
self.to_screen('\033]0;%s\007' % message, skip_eol=True)
def to_console_title(self, message):
self.ydl.to_console_title(message)
def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs)
@@ -249,7 +238,7 @@ class FileDownloader(object):
else:
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
def report_resuming_byte(self, resume_len):

View File

@@ -13,7 +13,34 @@ import sys
import time
import traceback
from .utils import *
if os.name == 'nt':
import ctypes
from .utils import (
compat_http_client,
compat_print,
compat_str,
compat_urllib_error,
compat_urllib_request,
ContentTooShortError,
date_from_str,
DateRange,
determine_ext,
DownloadError,
encodeFilename,
ExtractorError,
locked_file,
MaxDownloadsReached,
PostProcessingError,
preferredencoding,
SameFileError,
sanitize_filename,
subtitles_filename,
takewhile_inclusive,
UnavailableVideoError,
write_json_file,
write_string,
)
from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader
@@ -176,6 +203,35 @@ class YoutubeDL(object):
output = output.encode(preferredencoding())
sys.stderr.write(output)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
write_string(u'\033]0;%s\007' % message, self._screen_file)
def save_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
write_string(u'\033[22t', self._screen_file)
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
write_string(u'\033[23t', self._screen_file)
def __enter__(self):
self.save_console_title()
return self
def __exit__(self, *args):
self.restore_console_title()
def fixed_template(self):
"""Checks if the output template is fixed."""
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
@@ -254,7 +310,7 @@ class YoutubeDL(object):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
except (UnicodeEncodeError) as err:
except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
def increment_downloads(self):
@@ -385,7 +441,7 @@ class YoutubeDL(object):
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
if result_type == 'video':
self.add_extra_info(ie_result, extra_info)
return self.process_video_result(ie_result)
return self.process_video_result(ie_result, download=download)
elif result_type == 'url':
# We have to add extra_info to the results because it may be
# contained in a playlist
@@ -640,7 +696,7 @@ class YoutubeDL(object):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat')
sub_format = self.params.get('subtitlesformat', 'srt')
for sub_lang in subtitles.keys():
sub = subtitles[sub_lang]
if sub is None:
@@ -781,20 +837,42 @@ class YoutubeDL(object):
return res
def list_formats(self, info_dict):
def format_note(fdict):
if fdict.get('format_note') is not None:
return fdict['format_note']
res = u''
if fdict.get('vcodec') is not None:
res += u'%-5s' % fdict['vcodec']
elif fdict.get('vbr') is not None:
res += u'video'
if fdict.get('vbr') is not None:
res += u'@%4dk' % fdict['vbr']
if fdict.get('acodec') is not None:
if res:
res += u', '
res += u'%-5s' % fdict['acodec']
elif fdict.get('abr') is not None:
if res:
res += u', '
res += 'audio'
if fdict.get('abr') is not None:
res += u'@%3dk' % fdict['abr']
return res
def line(format):
return (u'%-20s%-10s%-12s%s' % (
format['format_id'],
format['ext'],
self.format_resolution(format),
format.get('format_note', ''),
format_note(format),
)
)
formats = info_dict.get('formats', [info_dict])
formats_s = list(map(line, formats))
if len(formats) > 1:
formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)'
formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)'
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
header_line = line({
'format_id': u'format code', 'ext': u'extension',

View File

@@ -603,8 +603,7 @@ def _real_main(argv=None):
u' file! Use "%%(ext)s" instead of %r' %
determine_ext(outtmpl, u''))
# YoutubeDL
ydl = YoutubeDL({
ydl_opts = {
'usenetrc': opts.usenetrc,
'username': opts.username,
'password': opts.password,
@@ -667,61 +666,63 @@ def _real_main(argv=None):
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
'download_archive': opts.download_archive,
})
}
if opts.verbose:
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
with YoutubeDL(ydl_opts) as ydl:
if opts.verbose:
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sys.exc_clear()
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
pass
write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
proxy_map = {}
for handler in opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
ydl.add_default_info_extractors()
ydl.add_default_info_extractors()
# PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
# PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing
if len(all_urls) < 1:
if not opts.update_self:
parser.error(u'you must provide at least one URL')
else:
sys.exit()
# Maybe do nothing
if len(all_urls) < 1:
if not opts.update_self:
parser.error(u'you must provide at least one URL')
else:
sys.exit()
try:
retcode = ydl.download(all_urls)
except MaxDownloadsReached:
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
try:
retcode = ydl.download(all_urls)
except MaxDownloadsReached:
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
# Dump cookie jar if requested
if opts.cookiefile is not None:

View File

@@ -80,7 +80,7 @@ from .keezmovies import KeezMoviesIE
from .kickstarter import KickStarterIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE
from .livestream import LivestreamIE, LivestreamOriginalIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE
@@ -116,7 +116,10 @@ from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .southparkstudios import SouthParkStudiosIE
from .southparkstudios import (
SouthParkStudiosIE,
SouthparkDeIE,
)
from .space import SpaceIE
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE

View File

@@ -69,7 +69,7 @@ class ArteTvIE(InfoExtractor):
lang = mobj.group('lang')
return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, video_id) is not None:
if re.search(self._LIVE_URL, url) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
# return
@@ -115,7 +115,7 @@ class ArteTvIE(InfoExtractor):
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:
url_node = video_doc.find('urlSd')
url_node = event_doc.find('urlSd')
return {'id': video_id,
'title': event_doc.find('name%s' % lang.capitalize()).text,

View File

@@ -15,7 +15,8 @@ class BambuserIE(InfoExtractor):
_TEST = {
u'url': u'http://bambuser.com/v/4050584',
u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
#u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
u'info_dict': {
u'id': u'4050584',
u'ext': u'flv',

View File

@@ -71,6 +71,10 @@ class InfoExtractor(object):
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
@@ -315,13 +319,19 @@ class InfoExtractor(object):
# Helper functions for extracting OpenGraph info
@staticmethod
def _og_regex(prop):
return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
def _og_regexes(prop):
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [
template % (property_re, content_re),
template % (content_re, property_re),
]
def _og_search_property(self, prop, html, name=None, **kargs):
if name is None:
name = 'OpenGraph %s' % prop
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
if escaped is None:
return None
return unescapeHTML(escaped)
@@ -336,8 +346,8 @@ class InfoExtractor(object):
return self._og_search_property('title', html, **kargs)
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
regexes = [self._og_regex('video')]
if secure: regexes.insert(0, self._og_regex('video:secure_url'))
regexes = self._og_regexes('video')
if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
def _rta_search(self, html):

View File

@@ -186,7 +186,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:

View File

@@ -1,9 +1,6 @@
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
class GamekingsIE(InfoExtractor):
@@ -11,7 +8,8 @@ class GamekingsIE(InfoExtractor):
_TEST = {
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
u'file': u'20130811.mp4',
u'md5': u'17f6088f7d0149ff2b46f2714bdb1954',
# MD5 is flaky, seems to change regularly
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
u'info_dict': {
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",

View File

@@ -1,16 +1,19 @@
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urlparse,
get_meta_content,
xpath_with_ns,
ExtractorError,
)
class LivestreamIE(InfoExtractor):
IE_NAME = u'livestream'
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
_TEST = {
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
@@ -54,3 +57,44 @@ class LivestreamIE(InfoExtractor):
info = json.loads(self._download_webpage(api_url, video_id,
u'Downloading video info'))
return self._extract_video_info(info)
# The original version of Livestream uses a different system
class LivestreamOriginalIE(InfoExtractor):
IE_NAME = u'livestream:original'
_VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
_TEST = {
u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
u'info_dict': {
u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
u'ext': u'flv',
u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
},
u'params': {
# rtmp
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
user = mobj.group('user')
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
api_response = self._download_webpage(api_url, video_id)
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
item = info.find('channel').find('item')
ns = {'media': 'http://search.yahoo.com/mrss'}
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
# Remove the extension and number from the path (like 1.jpg)
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path')
return {
'id': video_id,
'title': item.find('title').text,
'url': 'rtmp://extondemand.livestream.com/ondemand',
'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
'ext': 'flv',
'thumbnail': thumbnail_url,
}

View File

@@ -48,7 +48,7 @@ class MTVIE(InfoExtractor):
def _transform_rtmp_url(rtmp_video_url):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
if not m:
raise ExtractorError(u'Cannot transform RTMP url')
return rtmp_video_url
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
return base + m.group('finalid')

View File

@@ -8,7 +8,9 @@ class RedTubeIE(InfoExtractor):
_TEST = {
u'url': u'http://www.redtube.com/66418',
u'file': u'66418.mp4',
u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
# md5 varies from time to time, as in
# https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
#u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
u'info_dict': {
u"title": u"Sucked on a toilet",
u"age_limit": 18,

View File

@@ -62,18 +62,6 @@ class RTLnowIE(InfoExtractor):
u'skip_download': True,
},
},
{
u'url': u'http://www.rtlnitronow.de/recht-ordnung/stadtpolizei-frankfurt-gerichtsvollzieher-leipzig.php?film_id=129679&player=1&season=1',
u'file': u'129679.flv',
u'info_dict': {
u'upload_date': u'20131016',
u'title': u'Recht & Ordnung - Stadtpolizei Frankfurt/ Gerichtsvollzieher...',
u'description': u'Stadtpolizei Frankfurt/ Gerichtsvollzieher Leipzig',
},
u'params': {
u'skip_download': True,
},
},
{
u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
u'file': u'124903.flv',

View File

@@ -87,7 +87,7 @@ class SoundcloudIE(InfoExtractor):
'uploader': info['user']['username'],
'upload_date': unified_strdate(info['created_at']),
'title': info['title'],
'ext': u'mp3',
'ext': info.get('original_format', u'mp3'),
'description': info['description'],
'thumbnail': thumbnail,
}

View File

@@ -5,21 +5,19 @@ from .mtv import MTVIE, _media_xml_tag
class SouthParkStudiosIE(MTVIE):
IE_NAME = u'southparkstudios.com'
_VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
_TEST = {
# Overwrite MTVIE properties we don't want
_TESTS = [{
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
u'info_dict': {
u'title': u'Bat Daded',
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
},
}
# Overwrite MTVIE properties we don't want
_TESTS = []
}]
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
@@ -31,8 +29,23 @@ class SouthParkStudiosIE(MTVIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url = u'http://www.' + mobj.group(u'url')
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
webpage, u'mgid')
return self._get_videos_info(mgid)
class SouthparkDeIE(SouthParkStudiosIE):
IE_NAME = u'southpark.de'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
_TESTS = [{
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
u'info_dict': {
u'title': u'The Government Won\'t Respect My Privacy',
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
},
}]

View File

@@ -2,18 +2,27 @@ import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import determine_ext
class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
_TEST = {
_TESTS = [{
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
u'file': u'1259285.mp4',
u'md5': u'2c2754212136f35fb4b19767d242f66e',
u'info_dict': {
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
}
}
},
{
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
u'file': u'1309159.mp4',
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
u'info_dict': {
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
}
}]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
@@ -21,25 +30,38 @@ class SpiegelIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
webpage, u'title')
video_title = self._html_search_regex(
r'<div class="module-title">(.*?)</div>', webpage, u'title')
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
xml_code = self._download_webpage(xml_url, video_id,
note=u'Downloading XML', errnote=u'Failed to download XML')
xml_code = self._download_webpage(
xml_url, video_id,
note=u'Downloading XML', errnote=u'Failed to download XML')
idoc = xml.etree.ElementTree.fromstring(xml_code)
last_type = idoc[-1]
filename = last_type.findall('./filename')[0].text
duration = float(last_type.findall('./duration')[0].text)
video_url = 'http://video2.spiegel.de/flash/' + filename
video_ext = filename.rpartition('.')[2]
formats = [
{
'format_id': n.tag.rpartition('type')[2],
'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
'width': int(n.find('./width').text),
'height': int(n.find('./height').text),
'abr': int(n.find('./audiobitrate').text),
'vbr': int(n.find('./videobitrate').text),
'vcodec': n.find('./codec').text,
'acodec': 'MP4A',
}
for n in list(idoc)
# Blacklist type 6, it's extremely LQ and not available on the same server
if n.tag.startswith('type') and n.tag != 'type6'
]
formats.sort(key=lambda f: f['vbr'])
duration = float(idoc[0].findall('./duration')[0].text)
info = {
'id': video_id,
'url': video_url,
'ext': video_ext,
'title': video_title,
'duration': duration,
'formats': formats,
}
return [info]
return info

View File

@@ -43,26 +43,25 @@ class TEDIE(SubtitlesInfoExtractor):
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
return [self._playlist_videos_info(url,name,playlist_id)]
def _playlist_videos_info(self,url,name,playlist_id=0):
def _playlist_videos_info(self, url, name, playlist_id):
'''Returns the videos of the playlist'''
video_RE=r'''
<li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
([.\s]*?)data-playlist_item_id="(\d+)"
([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
'''
video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
m_names=re.finditer(video_name_RE,webpage)
webpage = self._download_webpage(
url, playlist_id, u'Downloading playlist webpage')
matches = re.finditer(
r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
webpage)
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
webpage, 'playlist title')
playlist_entries = []
for m_video, m_name in zip(m_videos,m_names):
talk_url='http://www.ted.com%s' % m_name.group('talk_url')
playlist_entries.append(self.url_result(talk_url, 'TED'))
return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title)
playlist_entries = [
self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
for m in matches
]
return self.playlist_result(
playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
def _talk_info(self, url, video_id=0):
"""Return the video for the talk in the url"""
@@ -85,7 +84,7 @@ class TEDIE(SubtitlesInfoExtractor):
'ext': 'mp4',
'url': stream['file'],
'format': stream['id']
} for stream in info['htmlStreams']]
} for stream in info['htmlStreams']]
video_id = info['id']
@@ -95,7 +94,7 @@ class TEDIE(SubtitlesInfoExtractor):
self._list_available_subtitles(video_id, webpage)
return
info = {
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
@@ -104,11 +103,6 @@ class TEDIE(SubtitlesInfoExtractor):
'formats': formats,
}
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
return info
def _get_available_subtitles(self, video_id, webpage):
try:
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)

View File

@@ -13,9 +13,10 @@ class TvpIE(InfoExtractor):
u'md5': u'148408967a6a468953c0a75cbdaf0d7a',
u'file': u'12878238.wmv',
u'info_dict': {
u'title': u'31.10.2013',
u'description': u'31.10.2013',
u'title': u'31.10.2013 - Odcinek 2',
u'description': u'31.10.2013 - Odcinek 2',
},
u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
}
def _real_extract(self, url):

View File

@@ -78,12 +78,13 @@ class VevoIE(InfoExtractor):
continue
format_url = self._SMIL_BASE_URL + m.group('path')
format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' %
m.groupdict())
formats.append({
'url': format_url,
'format_id': u'SMIL_' + m.group('cbr'),
'format_note': format_note,
'vcodec': m.group('vcodec'),
'acodec': m.group('acodec'),
'vbr': int(m.group('vbr')),
'abr': int(m.group('abr')),
'ext': m.group('ext'),
'width': int(m.group('width')),
'height': int(m.group('height')),

View File

@@ -1019,6 +1019,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
"""Turn the encrypted s field into a working signature"""
if player_url is not None:
if player_url.startswith(u'//'):
player_url = u'https:' + player_url
try:
player_id = (player_url, len(s))
if player_id not in self._player_cache:
@@ -1098,7 +1100,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
params = compat_urllib_parse.urlencode({
'lang': lang,
'v': video_id,
'fmt': self._downloader.params.get('subtitlesformat'),
'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
'name': l[0].encode('utf-8'),
})
url = u'http://www.youtube.com/api/timedtext?' + params
@@ -1111,7 +1113,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _get_available_automatic_caption(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
sub_format = self._downloader.params.get('subtitlesformat')
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
self.to_screen(u'%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = u'Couldn\'t find automatic captions for %s' % video_id
@@ -1299,6 +1301,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
raise ExtractorError(u'"token" parameter not in video info for unknown reason')
if 'view_count' in video_info:
view_count = int(video_info['view_count'][0])
else:
view_count = None
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
raise ExtractorError(u'"rental" videos not supported')
@@ -1487,6 +1494,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
})
return results
@@ -1592,20 +1600,31 @@ class YoutubeChannelIE(InfoExtractor):
# Download channel page
channel_id = mobj.group(1)
video_ids = []
url = 'https://www.youtube.com/channel/%s/videos' % channel_id
channel_page = self._download_webpage(url, channel_id)
if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
autogenerated = True
else:
autogenerated = False
# Download all channel pages using the json-based channel_ajax query
for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_webpage(url, channel_id,
u'Downloading page #%s' % pagenum)
page = json.loads(page)
ids_in_page = self.extract_videos_from_page(page['content_html'])
video_ids.extend(ids_in_page)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break
if autogenerated:
# The videos are contained in a single page
# the ajax pages can't be used, they are empty
video_ids = self.extract_videos_from_page(channel_page)
else:
# Download all channel pages using the json-based channel_ajax query
for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_webpage(url, channel_id,
u'Downloading page #%s' % pagenum)
page = json.loads(page)
ids_in_page = self.extract_videos_from_page(page['content_html'])
video_ids.extend(ids_in_page)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))

View File

@@ -2,11 +2,15 @@ import io
import json
import traceback
import hashlib
import os
import subprocess
import sys
from zipimport import zipimporter
from .utils import *
from .utils import (
compat_str,
compat_urllib_request,
)
from .version import __version__
def rsa_verify(message, signature, key):

View File

@@ -1,2 +1,2 @@
__version__ = '2013.11.13'
__version__ = '2013.11.17'