Compare commits

..

24 Commits

Author SHA1 Message Date
Philipp Hagemeister
835a22ef3f release 2014.11.23.1 2014-11-23 10:51:16 +01:00
Philipp Hagemeister
7d4111ed14 Provide guidance when called with a YouTube ID starting with a dash.
Reported at https://news.ycombinator.com/item?id=8648121
2014-11-23 10:51:09 +01:00
Philipp Hagemeister
d37cab2a9d Credit @WillSewell for vk:user (#4233) 2014-11-23 10:12:35 +01:00
Philipp Hagemeister
d16abf434a [vk] Some PEP8 love 2014-11-23 10:11:52 +01:00
Philipp Hagemeister
a8363f3ab7 [vk] Clarify test 2014-11-23 10:11:04 +01:00
Philipp Hagemeister
010cd3a3ee Merge remote-tracking branch 'WillSewell/vk-playlists' 2014-11-23 10:09:45 +01:00
Philipp Hagemeister
b9042def9d release 2014.11.23 2014-11-23 09:59:42 +01:00
Philipp Hagemeister
aa79ac0c82 [youtube] Support controversy videos (Fixes #4275) 2014-11-23 09:59:02 +01:00
Philipp Hagemeister
88125905cf Credit @nulloz for telebruxelles (#4270) 2014-11-23 09:49:23 +01:00
Philipp Hagemeister
dd60be2bf9 [telebruxelles] Simplify (#4270) 2014-11-23 09:44:42 +01:00
Philipp Hagemeister
119b3caa46 Merge remote-tracking branch 'nulloz/telebruxelles' 2014-11-23 09:38:18 +01:00
Naglis Jonaitis
49f0da7ae1 [rtlxl] Use unencrypted m3u8 streams (#4115) 2014-11-22 21:06:45 +02:00
nulloz
2cead7e7bc telebruxelles Add new extractor 2014-11-22 13:34:29 +01:00
Will Sewell
9262867e86 [vk.com] Added newline at the end of the file. 2014-11-21 23:25:05 +00:00
Will Sewell
b9272e8f8f [vk.com] Removed redundant log message -- this information is already being logged. 2014-11-21 23:22:52 +00:00
Will Sewell
021a0db8f7 [vk.com] Simplified the page_id acquisition by using the id matched in the URL earlier on. 2014-11-21 23:22:44 +00:00
Will Sewell
e1e8b6897b [vk.com] Updated the extract_videos_from_page function with a much simpler 1-liner. 2014-11-21 23:16:12 +00:00
Will Sewell
53d1cd1f77 [vk.com] Updated the _VALID_URL regex for the playlist IE. Removed optional m, and named the id group. 2014-11-21 23:03:31 +00:00
Will Sewell
cad985ab4d [vk.com] Updated the description to include vk.com. 2014-11-21 23:00:43 +00:00
Will Sewell
c52331f30c [vk.com] Updated a test video that has been removed, and added a comment for others to update two other test videos that are also now removed. 2014-11-21 23:00:33 +00:00
Will Sewell
42e1ff8665 [vk.com] Added upload_date variable to the test cases that still work. 2014-11-21 23:00:17 +00:00
Will Sewell
02a12f9fe6 [vk] date_added is now extracted from the video page. 2014-11-18 20:19:56 +00:00
Will Sewell
6fcd6e0e21 [vk] Updated the regex for matching user video pages. It now matches optional URL parameters too. 2014-11-18 19:34:12 +00:00
Will Sewell
469d4c8968 [vk] Added a new information extractor for pages that are a list of a user\'s videos on vk.com. It works in a same way to playlist style pages for the YT information extractors. 2014-11-17 17:53:34 -05:00
12 changed files with 180 additions and 32 deletions

View File

@@ -82,3 +82,5 @@ Xavier Beynon
Gabriel Schubiner
xantares
Jan Matějka
Mauroy Sébastien
William Sewell

View File

@@ -47,6 +47,7 @@ from youtube_dl.utils import (
js_to_json,
get_filesystem_encoding,
intlist_to_bytes,
args_to_str,
)
@@ -361,5 +362,11 @@ class TestUtil(unittest.TestCase):
intlist_to_bytes([0, 1, 127, 128, 255]),
b'\x00\x01\x7f\x80\xff')
def test_args_to_str(self):
self.assertEqual(
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
'foo ba/r -baz \'2 be\' \'\''
)
if __name__ == '__main__':
unittest.main()

View File

@@ -29,6 +29,7 @@ from .compat import (
compat_str,
compat_urllib_error,
compat_urllib_request,
shlex_quote,
)
from .utils import (
escape_url,
@@ -60,6 +61,7 @@ from .utils import (
write_string,
YoutubeDLHandler,
prepend_extension,
args_to_str,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractors
@@ -253,6 +255,22 @@ class YoutubeDL(object):
self.print_debug_header()
self.add_default_info_extractors()
def warn_if_short_id(self, argv):
# short YouTube ID starting with dash?
idxs = [
i for i, a in enumerate(argv)
if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
if idxs:
correct_argv = (
['youtube-dl'] +
[a for i, a in enumerate(argv) if i not in idxs] +
['--'] + [argv[i] for i in idxs]
)
self.report_warning(
'Long argument string detected. '
'Use -- to separate parameters and URLs, like this:\n%s\n' %
args_to_str(correct_argv))
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
@@ -1410,3 +1428,4 @@ class YoutubeDL(object):
if encoding is None:
encoding = preferredencoding()
return encoding

View File

@@ -334,11 +334,12 @@ def _real_main(argv=None):
# Maybe do nothing
if (len(all_urls) < 1) and (opts.load_info_filename is None):
if not (opts.update_self or opts.rm_cachedir):
parser.error('you must provide at least one URL')
else:
if opts.update_self or opts.rm_cachedir:
sys.exit()
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
parser.error('you must provide at least one URL')
try:
if opts.load_info_filename is not None:
retcode = ydl.download_with_info_file(opts.load_info_filename)

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import getpass
import optparse
import os
import re
import subprocess
import sys
@@ -174,7 +175,10 @@ try:
from shlex import quote as shlex_quote
except ImportError: # Python < 3.3
def shlex_quote(s):
return "'" + s.replace("'", "'\"'\"'") + "'"
if re.match(r'^[-_\w./]+$', s):
return s
else:
return "'" + s.replace("'", "'\"'\"'") + "'"
def compat_ord(c):

View File

@@ -380,6 +380,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telemb import TeleMBIE
from .tenplay import TenPlayIE
@@ -453,7 +454,10 @@ from .vine import (
VineUserIE,
)
from .viki import VikiIE
from .vk import VKIE
from .vk import (
VKIE,
VKUserVideosIE,
)
from .vodlocker import VodlockerIE
from .vporn import VpornIE
from .vrt import VRTIE

View File

@@ -28,9 +28,8 @@ class RtlXlIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
uuid = mobj.group('uuid')
# Use m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
info = self._download_json(
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/d=pc/fmt=adaptive/' % uuid,
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
uuid)
material = info['material'][0]
@@ -39,12 +38,13 @@ class RtlXlIE(InfoExtractor):
progname = info['abstracts'][0]['name']
subtitle = material['title'] or info['episodes'][0]['name']
videopath = material['videopath']
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
videopath = material['videopath'].replace('.f4m', '.m3u8')
m3u8_url = 'http://manifest.us.rtl.nl' + videopath
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
video_urlpart = videopath.split('/adaptive/')[1][:-4]
video_urlpart = videopath.split('/flash/')[1][:-4]
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
formats.extend([

View File

@@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class TeleBruxellesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
'md5': '59439e568c9ee42fb77588b2096b214f',
'info_dict': {
'id': '11942',
'display_id': 'auditions-devant-parlement-francken-galant-tres-attendus',
'ext': 'flv',
'title': 'Parlement : Francken et Galant répondent aux interpellations de lopposition',
'description': 're:Les auditions des ministres se poursuivent*'
},
'params': {
'skip_download': 'requires rtmpdump'
},
}, {
'url': 'http://www.telebruxelles.be/sport/basket-brussels-bat-mons-80-74/',
'md5': '181d3fbdcf20b909309e5aef5c6c6047',
'info_dict': {
'id': '10091',
'display_id': 'basket-brussels-bat-mons-80-74',
'ext': 'flv',
'title': 'Basket : le Brussels bat Mons 80-74',
'description': 're:^Ils l\u2019on fait ! En basket, le B*',
},
'params': {
'skip_download': 'requires rtmpdump'
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
article_id = self._html_search_regex(
r"<article id=\"post-(\d+)\"", webpage, 'article ID')
title = self._html_search_regex(
r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
description = self._og_search_description(webpage)
rtmp_url = self._html_search_regex(
r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"",
webpage, 'RTMP url')
rtmp_url = rtmp_url.replace("\" + \"", "")
return {
'id': article_id,
'display_id': display_id,
'title': title,
'description': description,
'url': rtmp_url,
'ext': 'flv',
'rtmp_live': True # if rtmpdump is not called with "--live" argument, the download is blocked and can be completed
}

View File

@@ -11,12 +11,13 @@ from ..utils import (
compat_urllib_parse,
compat_str,
unescapeHTML,
)
unified_strdate,
orderedSet)
class VKIE(InfoExtractor):
IE_NAME = 'vk.com'
_VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
_VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))'
_NETRC_MACHINE = 'vk'
_TESTS = [
@@ -29,17 +30,19 @@ class VKIE(InfoExtractor):
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:Noize MC.*',
'duration': 195,
'upload_date': '20120212',
},
},
{
'url': 'http://vk.com/video4643923_163339118',
'md5': 'f79bccb5cd182b1f43502ca5685b2b36',
'url': 'http://vk.com/video205387401_165548505',
'md5': '6c0aeb2e90396ba97035b9cbde548700',
'info_dict': {
'id': '163339118',
'id': '165548505',
'ext': 'mp4',
'uploader': 'Elya Iskhakova',
'title': 'Dream Theater - Hollow Years Live at Budokan 720*',
'duration': 558,
'uploader': 'Tom Cruise',
'title': 'No name',
'duration': 9,
'upload_date': '20130721'
}
},
{
@@ -52,9 +55,12 @@ class VKIE(InfoExtractor):
'uploader': 'Vladimir Gavrin',
'title': 'Lin Dan',
'duration': 101,
'upload_date': '20120730',
}
},
{
# VIDEO NOW REMOVED
# please update if you find a video whose URL follows the same pattern
'url': 'http://vk.com/video-8871596_164049491',
'md5': 'a590bcaf3d543576c9bd162812387666',
'note': 'Only available for registered users',
@@ -64,18 +70,7 @@ class VKIE(InfoExtractor):
'uploader': 'Триллеры',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
'duration': 8352,
},
'skip': 'Requires vk account credentials',
},
{
'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
'md5': 'd82c22e449f036282d1d3f7f4d276869',
'info_dict': {
'id': '166094326',
'ext': 'mp4',
'uploader': 'Киномания - лучшее из мира кино',
'title': 'Запах женщины (1992)',
'duration': 9392,
'upload_date': '20121218'
},
'skip': 'Requires vk account credentials',
},
@@ -88,6 +83,7 @@ class VKIE(InfoExtractor):
'uploader': 'Киномания - лучшее из мира кино',
'title': ' ',
'duration': 7291,
'upload_date': '20140328',
},
'skip': 'Requires vk account credentials',
},
@@ -100,9 +96,15 @@ class VKIE(InfoExtractor):
'ext': 'mp4',
'title': 'Книга Илая',
'duration': 6771,
'upload_date': '20140626',
},
'skip': 'Only works from Russia',
},
{
# removed video, just testing that we match the pattern
'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
'only_matching': True,
},
]
def _login(self):
@@ -169,6 +171,13 @@ class VKIE(InfoExtractor):
data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
data = json.loads(data_json)
# Extract upload date
upload_date = None
mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
if mobj is not None:
x = mobj.group(1) + ' ' + mobj.group(2)
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
formats = [{
'format_id': k,
'url': v,
@@ -183,5 +192,28 @@ class VKIE(InfoExtractor):
'title': unescapeHTML(data['md_title']),
'thumbnail': data.get('jpg'),
'uploader': data.get('md_author'),
'duration': data.get('duration')
'duration': data.get('duration'),
'upload_date': upload_date,
}
class VKUserVideosIE(InfoExtractor):
IE_NAME = 'vk.com:user-videos'
IE_DESC = 'vk.com:All of a user\'s videos'
_VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?'
_TEMPLATE_URL = 'https://vk.com/videos'
_TEST = {
'url': 'http://vk.com/videos205387401',
'playlist_mincount': 4,
}
def _real_extract(self, url):
page_id = self._match_id(url)
page = self._download_webpage(url, page_id)
video_ids = orderedSet(
m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
url_entries = [
self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_entries, page_id)

View File

@@ -406,6 +406,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'format': '141',
},
},
# Controversy video
{
'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
'info_dict': {
'id': 'T4XJQO3qol8',
'ext': 'mp4',
'upload_date': '20100909',
'uploader': 'The Amazing Atheist',
'uploader_id': 'TheAmazingAtheist',
'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
}
}
]
def __init__(self, *args, **kwargs):
@@ -666,7 +679,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_id = self.extract_id(url)
# Get video webpage
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
pref_cookies = [
c for c in self._downloader.cookiejar
if c.domain == '.youtube.com' and c.name == 'PREF']

View File

@@ -41,6 +41,7 @@ from .compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
shlex_quote,
)
@@ -1433,3 +1434,8 @@ def ytdl_is_updateable():
from zipimport import zipimporter
return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
def args_to_str(args):
# Get a short string representation for a subprocess command
return ' '.join(shlex_quote(a) for a in args)

View File

@@ -1,2 +1,2 @@
__version__ = '2014.11.21.1'
__version__ = '2014.11.23.1'