Compare commits
60 Commits
2014.11.04
...
2014.11.13
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c3d582985f | ||
|
|
4c0924bb24 | ||
|
|
3fa5bb3802 | ||
|
|
c47ec62b83 | ||
|
|
e4bdb37ec6 | ||
|
|
3e6e4999ca | ||
|
|
0e15e725a0 | ||
|
|
437f68d868 | ||
|
|
d91d124081 | ||
|
|
2d42905b68 | ||
|
|
cbe71cb41d | ||
|
|
894dd8682e | ||
|
|
9e05d039e0 | ||
|
|
bbd5f2de5e | ||
|
|
73689dafbf | ||
|
|
4b50ba0989 | ||
|
|
5ccaddf5b1 | ||
|
|
0b201a3134 | ||
|
|
ffe38646ca | ||
|
|
b703ab4d7f | ||
|
|
c6afed48ff | ||
|
|
732c848c14 | ||
|
|
9d2a4dae90 | ||
|
|
7009a9047a | ||
|
|
498942f187 | ||
|
|
28465df1ff | ||
|
|
ef89dba58f | ||
|
|
13ba3a6461 | ||
|
|
8f6ec4bbe6 | ||
|
|
c295490830 | ||
|
|
eb4cb42a02 | ||
|
|
7a8cbc72b2 | ||
|
|
2774852c2f | ||
|
|
bbcc21efd1 | ||
|
|
60526d6bcb | ||
|
|
1d4df56d09 | ||
|
|
a1cf99d03a | ||
|
|
3c6af203cc | ||
|
|
1a92e086a7 | ||
|
|
519c73f267 | ||
|
|
a6dae6c09c | ||
|
|
f866e474f3 | ||
|
|
8bb9b97c97 | ||
|
|
d6fdc38682 | ||
|
|
c2b61af548 | ||
|
|
2fdbf27ad8 | ||
|
|
29ed169cd6 | ||
|
|
b868c972d1 | ||
|
|
9908e03528 | ||
|
|
1fe8fb8c20 | ||
|
|
5d63b0aa93 | ||
|
|
4164f0117e | ||
|
|
37aab27808 | ||
|
|
6110bbbfdd | ||
|
|
cde9b380e6 | ||
|
|
dab647a7b6 | ||
|
|
a316a83d2b | ||
|
|
81b22aee8b | ||
|
|
982a58d049 | ||
|
|
ccdd0ffb80 |
3
AUTHORS
3
AUTHORS
@@ -79,4 +79,5 @@ Dennis Scheiba
|
||||
Damon Timm
|
||||
winwon
|
||||
Xavier Beynon
|
||||
Gabriel Schubiner
|
||||
Gabriel Schubiner
|
||||
xantares
|
||||
|
||||
31
README.md
31
README.md
@@ -131,17 +131,19 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
%(upload_date)s for the upload date
|
||||
(YYYYMMDD), %(extractor)s for the provider
|
||||
(youtube, metacafe, etc), %(id)s for the
|
||||
video id, %(playlist)s for the playlist the
|
||||
video id, %(playlist_title)s,
|
||||
%(playlist_id)s, or %(playlist)s (=title if
|
||||
present, ID otherwise) for the playlist the
|
||||
video is in, %(playlist_index)s for the
|
||||
position in the playlist and %% for a
|
||||
literal percent. %(height)s and %(width)s
|
||||
for the width and height of the video
|
||||
format. %(resolution)s for a textual
|
||||
position in the playlist. %(height)s and
|
||||
%(width)s for the width and height of the
|
||||
video format. %(resolution)s for a textual
|
||||
description of the resolution of the video
|
||||
format. Use - to output to stdout. Can also
|
||||
be used to download to a different
|
||||
directory, for example with -o '/my/downloa
|
||||
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||
format. %% for a literal percent. Use - to
|
||||
output to stdout. Can also be used to
|
||||
download to a different directory, for
|
||||
example with -o '/my/downloads/%(uploader)s
|
||||
/%(title)s-%(id)s.%(ext)s' .
|
||||
--autonumber-size NUMBER Specifies the number of digits in
|
||||
%(autonumber)s when it is present in output
|
||||
filename template or --auto-number option
|
||||
@@ -239,8 +241,13 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
"worst", "worstvideo" and "worstaudio". By
|
||||
default, youtube-dl will pick the best
|
||||
quality. Use commas to download multiple
|
||||
audio formats, such as -f
|
||||
136/137/mp4/bestvideo,140/m4a/bestaudio
|
||||
audio formats, such as -f
|
||||
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
||||
You can merge the video and audio of two
|
||||
formats into a single file using -f <video-
|
||||
format>+<audio-format> (requires ffmpeg or
|
||||
avconv), for example -f
|
||||
bestvideo+bestaudio.
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific
|
||||
one is requested
|
||||
@@ -500,7 +507,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
||||
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
|
||||
@@ -145,7 +145,8 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
info_dict_str = ''.join(
|
||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||
for k, v in test_info_dict.items())
|
||||
write_string('\n"info_dict": {\n' + info_dict_str + '}\n', out=sys.stderr)
|
||||
write_string(
|
||||
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
|
||||
self.assertFalse(
|
||||
missing_keys,
|
||||
'Missing keys in test definition: %s' % (
|
||||
|
||||
@@ -16,6 +16,7 @@ import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from youtube_dl.utils import (
|
||||
clean_html,
|
||||
DateRange,
|
||||
encodeFilename,
|
||||
find_xpath_attr,
|
||||
@@ -45,6 +46,7 @@ from youtube_dl.utils import (
|
||||
escape_url,
|
||||
js_to_json,
|
||||
get_filesystem_encoding,
|
||||
intlist_to_bytes,
|
||||
)
|
||||
|
||||
|
||||
@@ -345,5 +347,14 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{"abc": true}')
|
||||
self.assertEqual(json.loads(on), {'abc': True})
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
|
||||
def test_intlist_to_bytes(self):
|
||||
self.assertEqual(
|
||||
intlist_to_bytes([0, 1, 127, 128, 255]),
|
||||
b'\x00\x01\x7f\x80\xff')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -14,7 +14,7 @@ import re
|
||||
import string
|
||||
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.utils import compat_str, compat_urlretrieve
|
||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||
|
||||
_TESTS = [
|
||||
(
|
||||
|
||||
@@ -658,6 +658,8 @@ class YoutubeDL(object):
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_index': i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
@@ -836,6 +838,13 @@ class YoutubeDL(object):
|
||||
formats_info = (self.select_format(format_1, formats),
|
||||
self.select_format(format_2, formats))
|
||||
if all(formats_info):
|
||||
# The first format must contain the video and the
|
||||
# second the audio
|
||||
if formats_info[0].get('vcodec') == 'none':
|
||||
self.report_error('The first format must '
|
||||
'contain the video, try using '
|
||||
'"-f %s+%s"' % (format_2, format_1))
|
||||
return
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
@@ -1297,11 +1306,13 @@ class YoutubeDL(object):
|
||||
self.report_warning(
|
||||
'Your Python is broken! Update to a newer and supported version')
|
||||
|
||||
stdout_encoding = getattr(
|
||||
sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
|
||||
encoding_str = (
|
||||
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
|
||||
locale.getpreferredencoding(),
|
||||
sys.getfilesystemencoding(),
|
||||
sys.stdout.encoding,
|
||||
stdout_encoding,
|
||||
self.get_encoding()))
|
||||
write_string(encoding_str, encoding=None)
|
||||
|
||||
|
||||
@@ -141,6 +141,7 @@ from .generic import GenericIE
|
||||
from .glide import GlideIE
|
||||
from .globo import GloboIE
|
||||
from .godtube import GodTubeIE
|
||||
from .goldenmoustache import GoldenMoustacheIE
|
||||
from .golem import GolemIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
@@ -323,6 +324,7 @@ from .sbs import SBSIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .sexykarma import SexyKarmaIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
@@ -421,6 +423,7 @@ from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vgtv import VGTVIE
|
||||
from .vh1 import VH1IE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
|
||||
@@ -11,13 +11,13 @@ class ABCIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
|
||||
'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
|
||||
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||
'md5': 'cb3dd03b18455a661071ee1e28344d9f',
|
||||
'info_dict': {
|
||||
'id': '5624716',
|
||||
'id': '5868334',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
|
||||
'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
|
||||
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
|
||||
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ class AllocineIE(InfoExtractor):
|
||||
'id': '19546517',
|
||||
'ext': 'mp4',
|
||||
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
||||
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
|
||||
'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
|
||||
@@ -110,20 +110,25 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
'playlist': [
|
||||
{
|
||||
'file': '1353101989.mp3',
|
||||
'md5': '39bc1eded3476e927c724321ddf116cf',
|
||||
'info_dict': {
|
||||
'id': '1353101989',
|
||||
'ext': 'mp3',
|
||||
'title': 'Intro',
|
||||
}
|
||||
},
|
||||
{
|
||||
'file': '38097443.mp3',
|
||||
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
|
||||
'info_dict': {
|
||||
'id': '38097443',
|
||||
'ext': 'mp3',
|
||||
'title': 'Kero One - Keep It Alive (Blazo remix)',
|
||||
}
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
'title': 'Jazz Format Mixtape vol.1',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
},
|
||||
|
||||
@@ -14,6 +14,7 @@ from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -263,10 +264,16 @@ class BrightcoveIE(InfoExtractor):
|
||||
if not url:
|
||||
continue
|
||||
if rend['remote']:
|
||||
# This type of renditions are served through akamaihd.net,
|
||||
# but they don't use f4m manifests
|
||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
||||
ext = 'flv'
|
||||
url_comp = compat_urllib_parse_urlparse(url)
|
||||
if url_comp.path.endswith('.m3u8'):
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(url, info['id'], 'mp4'))
|
||||
continue
|
||||
elif 'akamaihd.net' in url_comp.netloc:
|
||||
# This type of renditions are served through
|
||||
# akamaihd.net, but they don't use f4m manifests
|
||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
||||
ext = 'flv'
|
||||
else:
|
||||
ext = determine_ext(url)
|
||||
size = rend.get('size')
|
||||
|
||||
@@ -10,12 +10,12 @@ from ..utils import ExtractorError
|
||||
class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking',
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||
'info_dict': {
|
||||
'id': 'granite-flats-talking',
|
||||
'id': 'studio-c-season-5-episode-5',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
|
||||
'title': 'Talking',
|
||||
'description': 'md5:5438d33774b6bdc662f9485a340401cc',
|
||||
'title': 'Season 5 Episode 5',
|
||||
'thumbnail': 're:^https?://.*promo.*'
|
||||
},
|
||||
'params': {
|
||||
|
||||
@@ -27,7 +27,7 @@ class Channel9IE(InfoExtractor):
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
'duration': 4576,
|
||||
'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
|
||||
'thumbnail': 'http://video.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
|
||||
'session_code': 'KOS002',
|
||||
'session_day': 'Day 1',
|
||||
'session_room': 'Arena 1A',
|
||||
@@ -43,7 +43,7 @@ class Channel9IE(InfoExtractor):
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'duration': 1540,
|
||||
'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
|
||||
'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
|
||||
'authors': [ 'Mike Wilmot' ],
|
||||
},
|
||||
}
|
||||
@@ -94,7 +94,7 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
def _extract_title(self, html):
|
||||
title = self._html_search_meta('title', html, 'title')
|
||||
if title is None:
|
||||
if title is None:
|
||||
title = self._og_search_title(html)
|
||||
TITLE_SUFFIX = ' (Channel 9)'
|
||||
if title is not None and title.endswith(TITLE_SUFFIX):
|
||||
@@ -115,7 +115,7 @@ class Channel9IE(InfoExtractor):
|
||||
return self._html_search_meta('description', html, 'description')
|
||||
|
||||
def _extract_duration(self, html):
|
||||
m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
|
||||
m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
|
||||
return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
|
||||
|
||||
def _extract_slides(self, html):
|
||||
@@ -167,7 +167,7 @@ class Channel9IE(InfoExtractor):
|
||||
return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
|
||||
|
||||
def _extract_content(self, html, content_path):
|
||||
# Look for downloadable content
|
||||
# Look for downloadable content
|
||||
formats = self._formats_from_html(html)
|
||||
slides = self._extract_slides(html)
|
||||
zip_ = self._extract_zip(html)
|
||||
@@ -258,16 +258,17 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, content_path, 'Downloading web page')
|
||||
|
||||
page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
|
||||
if page_type_m is None:
|
||||
raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True)
|
||||
page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage)
|
||||
if page_type_m is not None:
|
||||
page_type = page_type_m.group('pagetype')
|
||||
if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
|
||||
return self._extract_entry_item(webpage, content_path)
|
||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||
return self._extract_session(webpage, content_path)
|
||||
elif page_type == 'Event':
|
||||
return self._extract_list(content_path)
|
||||
else:
|
||||
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
|
||||
|
||||
page_type = page_type_m.group('pagetype')
|
||||
if page_type == 'List': # List page, may contain list of 'item'-like objects
|
||||
else: # Assuming list
|
||||
return self._extract_list(content_path)
|
||||
elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content
|
||||
return self._extract_entry_item(webpage, content_path)
|
||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||
return self._extract_session(webpage, content_path)
|
||||
else:
|
||||
raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True)
|
||||
@@ -42,11 +42,12 @@ class CinemassacreIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage)
|
||||
if not mobj:
|
||||
raise ExtractorError('Can\'t extract embed url and video id')
|
||||
playerdata_url = mobj.group('embed_url')
|
||||
video_id = mobj.group('video_id')
|
||||
full_video_id = mobj.group('full_video_id')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(?P<title>.+?)\|', webpage, 'title')
|
||||
@@ -60,37 +61,52 @@ class CinemassacreIE(InfoExtractor):
|
||||
vidurl = self._search_regex(
|
||||
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
|
||||
|
||||
videolist_url = self._search_regex(
|
||||
r"file\s*:\s*'(http.+?/jwplayer\.smil)'", playerdata, 'jwplayer.smil')
|
||||
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
|
||||
videolist_url = None
|
||||
|
||||
formats = []
|
||||
baseurl = vidurl[:vidurl.rfind('/')+1]
|
||||
for video in videolist.findall('.//video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
file_ = src.partition(':')[-1]
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
bitrate = int_or_none(video.get('system-bitrate'))
|
||||
format = {
|
||||
'url': baseurl + file_,
|
||||
'format_id': src.rpartition('.')[0].rpartition('_')[-1],
|
||||
}
|
||||
if width or height:
|
||||
format.update({
|
||||
'tbr': bitrate // 1000 if bitrate else None,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
else:
|
||||
format.update({
|
||||
'abr': bitrate // 1000 if bitrate else None,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
mobj = re.search(r"'videoserver'\s*:\s*'(?P<videoserver>[^']+)'", playerdata)
|
||||
if mobj:
|
||||
videoserver = mobj.group('videoserver')
|
||||
mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
|
||||
vidid = mobj.group('vidid') if mobj else full_video_id
|
||||
videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
|
||||
else:
|
||||
mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
|
||||
if mobj:
|
||||
videolist_url = mobj.group('smil')
|
||||
|
||||
if videolist_url:
|
||||
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
|
||||
formats = []
|
||||
baseurl = vidurl[:vidurl.rfind('/')+1]
|
||||
for video in videolist.findall('.//video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
file_ = src.partition(':')[-1]
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
bitrate = int_or_none(video.get('system-bitrate'))
|
||||
format = {
|
||||
'url': baseurl + file_,
|
||||
'format_id': src.rpartition('.')[0].rpartition('_')[-1],
|
||||
}
|
||||
if width or height:
|
||||
format.update({
|
||||
'tbr': bitrate // 1000 if bitrate else None,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
else:
|
||||
format.update({
|
||||
'abr': bitrate // 1000 if bitrate else None,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
formats = [{
|
||||
'url': vidurl,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -94,7 +94,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
# It may just embed a vevo video:
|
||||
m_vevo = re.search(
|
||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)',
|
||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||
webpage)
|
||||
if m_vevo is not None:
|
||||
vevo_id = m_vevo.group('id')
|
||||
|
||||
@@ -21,7 +21,6 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.funnyordie.com/embed/e402820827',
|
||||
'md5': '29f4c5e5a61ca39dfd7e8348a75d0aad',
|
||||
'info_dict': {
|
||||
'id': 'e402820827',
|
||||
'ext': 'mp4',
|
||||
|
||||
@@ -101,17 +101,19 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
# https://github.com/rg3/youtube-dl/issues/3541
|
||||
'add_ie': ['Brightcove'],
|
||||
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
|
||||
'info_dict': {
|
||||
'id': '3866516442001',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Leer mij vrouwen kennen: Aflevering 1',
|
||||
'description': 'Leer mij vrouwen kennen: Aflevering 1',
|
||||
'uploader': 'SBS Broadcasting',
|
||||
},
|
||||
'skip': 'Restricted to Netherlands',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
},
|
||||
# Direct link to a video
|
||||
@@ -574,6 +576,7 @@ class GenericIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': os.path.splitext(url_basename(url))[0],
|
||||
'direct': True,
|
||||
'formats': [{
|
||||
'format_id': m.group('format_id'),
|
||||
'url': url,
|
||||
|
||||
48
youtube_dl/extractor/goldenmoustache.py
Normal file
48
youtube_dl/extractor/goldenmoustache.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GoldenMoustacheIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P<display_id>[\w-]+)-(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/',
|
||||
'md5': '0f904432fa07da5054d6c8beb5efb51a',
|
||||
'info_dict': {
|
||||
'id': '3700',
|
||||
'ext': 'mp4',
|
||||
'title': 'Suricate - Le Poker',
|
||||
'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL')
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?) - Golden Moustache</title>', webpage, 'title')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<strong>([0-9]+)</strong>\s*VUES</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
}
|
||||
@@ -1,15 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
str_to_int,
|
||||
ExtractorError,
|
||||
)
|
||||
import json
|
||||
|
||||
|
||||
class GoshgayIE(InfoExtractor):
|
||||
@@ -27,36 +23,27 @@ class GoshgayIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
family_friendly = self._html_search_meta(
|
||||
'isFamilyFriendly', webpage, default='false')
|
||||
config_url = self._search_regex(
|
||||
r"'config'\s*:\s*'([^']+)'", webpage, 'config URL')
|
||||
|
||||
player_config = self._search_regex(
|
||||
r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings')
|
||||
player_vars = json.loads(player_config.replace("'", '"'))
|
||||
width = str_to_int(player_vars.get('width'))
|
||||
height = str_to_int(player_vars.get('height'))
|
||||
config_uri = player_vars.get('config')
|
||||
config = self._download_xml(
|
||||
config_url, video_id, 'Downloading player config XML')
|
||||
|
||||
if config_uri is None:
|
||||
raise ExtractorError('Missing config URI')
|
||||
node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
|
||||
errnote='Unable to download XML')
|
||||
if node is None:
|
||||
if config is None:
|
||||
raise ExtractorError('Missing config XML')
|
||||
if node.tag != 'config':
|
||||
if config.tag != 'config':
|
||||
raise ExtractorError('Missing config attribute')
|
||||
fns = node.findall('file')
|
||||
imgs = node.findall('image')
|
||||
if len(fns) != 1:
|
||||
fns = config.findall('file')
|
||||
if len(fns) < 1:
|
||||
raise ExtractorError('Missing media URI')
|
||||
video_url = fns[0].text
|
||||
if len(imgs) < 1:
|
||||
thumbnail = None
|
||||
else:
|
||||
thumbnail = imgs[0].text
|
||||
|
||||
url_comp = compat_urlparse.urlparse(url)
|
||||
ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
|
||||
@@ -65,9 +52,7 @@ class GoshgayIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'thumbnail': thumbnail,
|
||||
'http_referer': ref,
|
||||
'age_limit': 18,
|
||||
'age_limit': 0 if family_friendly == 'true' else 18,
|
||||
}
|
||||
|
||||
@@ -8,12 +8,13 @@ import re
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, compat_urllib_request, compat_html_parser
|
||||
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_html_parser,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
|
||||
|
||||
@@ -5,11 +5,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
parse_iso8601,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -30,7 +30,7 @@ class IzleseneIE(InfoExtractor):
|
||||
'description': 'md5:253753e2655dde93f59f74b572454f6d',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': 1404298698,
|
||||
'timestamp': 1404302298,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
@@ -46,7 +46,7 @@ class IzleseneIE(InfoExtractor):
|
||||
'description': 'Tarkan Dortmund 2006 Konseri',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'parlayankiz',
|
||||
'timestamp': 1163318593,
|
||||
'timestamp': 1163322193,
|
||||
'upload_date': '20061112',
|
||||
'duration': 253.666,
|
||||
'age_limit': 0,
|
||||
@@ -55,10 +55,9 @@ class IzleseneIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.izlesene.com/video/%s' % video_id
|
||||
video_id = self._match_id(url)
|
||||
|
||||
url = 'http://www.izlesene.com/video/%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
@@ -33,7 +33,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||
if not m:
|
||||
return rtmp_video_url
|
||||
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||
base = 'http://viacommtvstrmfs.fplive.net/'
|
||||
return base + m.group('finalid')
|
||||
|
||||
def _get_feed_url(self, uri):
|
||||
|
||||
@@ -13,9 +13,10 @@ class MySpassIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.myspass\.de/.*'
|
||||
_TEST = {
|
||||
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||
'file': '11741.mp4',
|
||||
'md5': '0b49f4844a068f8b33f4b7c88405862b',
|
||||
'info_dict': {
|
||||
'id': '11741',
|
||||
'ext': 'mp4',
|
||||
"description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
|
||||
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
|
||||
},
|
||||
|
||||
@@ -67,7 +67,7 @@ class NDRIE(InfoExtractor):
|
||||
|
||||
thumbnail = None
|
||||
|
||||
video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||
video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page)
|
||||
if video_url:
|
||||
thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
|
||||
if thumbnails:
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -108,6 +109,9 @@ class NiconicoIE(InfoExtractor):
|
||||
flv_info_request, video_id,
|
||||
note='Downloading flv info', errnote='Unable to download flv info')
|
||||
|
||||
if 'deleted=' in flv_info_webpage:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||
|
||||
# Start extracting information
|
||||
@@ -171,7 +175,8 @@ class NiconicoPlaylistIE(InfoExtractor):
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'],
|
||||
'url': ('http://www.nicovideo.jp/watch/%s' %
|
||||
entry['item_data']['video_id']),
|
||||
} for entry in entries]
|
||||
|
||||
return {
|
||||
|
||||
61
youtube_dl/extractor/sexu.py
Normal file
61
youtube_dl/extractor/sexu.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SexuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://sexu.com/961791/',
|
||||
'md5': 'ff615aca9691053c94f8f10d96cd7884',
|
||||
'info_dict': {
|
||||
'id': '961791',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
|
||||
'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
|
||||
'categories': list, # NSFW
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
quality_arr = self._search_regex(
|
||||
r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
|
||||
formats = [{
|
||||
'url': fmt[0].replace('\\', ''),
|
||||
'format_id': fmt[1],
|
||||
'height': int(fmt[1][:3]),
|
||||
} for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
|
||||
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'image:\s*"([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
categories_str = self._html_search_meta(
|
||||
'keywords', webpage, 'categories')
|
||||
categories = (
|
||||
None if categories_str is None
|
||||
else categories_str.split(','))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urlparse
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class SpiegelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||
'md5': '2c2754212136f35fb4b19767d242f66e',
|
||||
@@ -29,16 +29,24 @@ class SpiegelIE(InfoExtractor):
|
||||
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
|
||||
'duration': 983,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
|
||||
'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51',
|
||||
'info_dict': {
|
||||
'id': '1519126',
|
||||
'ext': 'mp4',
|
||||
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
|
||||
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('videoID')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div class="module-title">(.*?)</div>', webpage, 'title')
|
||||
title = re.sub(r'\s+', ' ', self._html_search_regex(
|
||||
r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
|
||||
webpage, 'title'))
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
base_url = self._search_regex(
|
||||
@@ -79,7 +87,7 @@ class SpiegelArticleIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
|
||||
IE_NAME = 'Spiegel:Article'
|
||||
IE_DESC = 'Articles on spiegel.de'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
|
||||
'info_dict': {
|
||||
'id': '1516455',
|
||||
@@ -87,20 +95,34 @@ class SpiegelArticleIE(InfoExtractor):
|
||||
'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
|
||||
'description': 're:^Patrick Kämnitz gehört.{100,}',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
|
||||
'info_dict': {
|
||||
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Single video on top of the page
|
||||
video_link = self._search_regex(
|
||||
r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
|
||||
'video page URL')
|
||||
video_url = compat_urlparse.urljoin(
|
||||
self.http_scheme() + '//spiegel.de/', video_link)
|
||||
'video page URL', default=None)
|
||||
if video_link:
|
||||
video_url = compat_urlparse.urljoin(
|
||||
self.http_scheme() + '//spiegel.de/', video_link)
|
||||
return self.url_result(video_url)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': video_url,
|
||||
}
|
||||
# Multiple embedded videos
|
||||
embeds = re.findall(
|
||||
r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
|
||||
webpage)
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(
|
||||
self.http_scheme() + '//spiegel.de/', embed_path))
|
||||
for embed_path in embeds
|
||||
]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class StreamcloudIE(InfoExtractor):
|
||||
IE_NAME = 'streamcloud.eu'
|
||||
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
|
||||
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||
@@ -27,8 +27,8 @@ class StreamcloudIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
url = 'http://streamcloud.eu/%s' % video_id
|
||||
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ class SWRMediathekIE(InfoExtractor):
|
||||
|
||||
if media_type == 'Video':
|
||||
fmt.update({
|
||||
'format_note': ['144p', '288p', '544p'][quality-1],
|
||||
'format_note': ['144p', '288p', '544p', '720p'][quality-1],
|
||||
'vcodec': codec,
|
||||
})
|
||||
elif media_type == 'Audio':
|
||||
@@ -101,4 +101,4 @@ class SWRMediathekIE(InfoExtractor):
|
||||
'uploader': attr['channel_title'],
|
||||
'uploader_id': attr['channel_idkey'],
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,6 +50,7 @@ class TapelyIE(InfoExtractor):
|
||||
request = compat_urllib_request.Request(playlist_url)
|
||||
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
request.add_header('Accept', 'application/json')
|
||||
request.add_header('Referer', url)
|
||||
|
||||
playlist = self._download_json(request, display_id)
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'actively fooling us.'),
|
||||
'uploader': 'Dan Dennett',
|
||||
'width': 854,
|
||||
'duration': 1308,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
@@ -57,6 +58,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'title': 'Be passionate. Be courageous. Be your best.',
|
||||
'uploader': 'Gabby Giffords and Mark Kelly',
|
||||
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
||||
'duration': 1128,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
|
||||
@@ -178,6 +180,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'subtitles': video_subtitles,
|
||||
'formats': formats,
|
||||
'duration': talk_info.get('duration'),
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, talk_info):
|
||||
|
||||
@@ -4,9 +4,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_str,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
@@ -176,8 +176,7 @@ class TVPlayIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
@@ -208,6 +207,10 @@ class TVPlayIE(InfoExtractor):
|
||||
'app': m.group('app'),
|
||||
'play_path': m.group('playpath'),
|
||||
})
|
||||
elif video_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id))
|
||||
continue
|
||||
else:
|
||||
fmt.update({
|
||||
'url': video_url,
|
||||
|
||||
38
youtube_dl/extractor/vice.py
Normal file
38
youtube_dl/extractor/vice.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ViceIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'ext': 'mp4',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
},
|
||||
'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
try:
|
||||
embed_code = self._search_regex(
|
||||
r'embedCode=([^&\'"]+)', webpage,
|
||||
'ooyala embed code')
|
||||
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
|
||||
print(ooyala_url)
|
||||
except ExtractorError:
|
||||
raise ExtractorError('The page doesn\'t contain a video', expected=True)
|
||||
return self.url_result(ooyala_url, ie='Ooyala')
|
||||
|
||||
@@ -27,15 +27,15 @@ class WrzutaIE(InfoExtractor):
|
||||
'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad',
|
||||
'md5': '1e546a18e1c22ac6e9adce17b8961ff5',
|
||||
'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty',
|
||||
'md5': 'bc78077859bea7bcfe4295d7d7fc9025',
|
||||
'info_dict': {
|
||||
'id': '9oXJqdcndqv',
|
||||
'id': '063jOPX5ue2',
|
||||
'ext': 'ogg',
|
||||
'title': 'David Guetta & Showtek ft. Vassy - Bad',
|
||||
'duration': 270,
|
||||
'uploader_id': 'w729',
|
||||
'description': 'md5:4628f01c666bbaaecefa83476cfa794a',
|
||||
'title': 'Liber & Natalia Szroeder - Teraz Ty',
|
||||
'duration': 203,
|
||||
'uploader_id': 'jolka85',
|
||||
'description': 'md5:2d2b6340f9188c8c4cd891580e481096',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -49,16 +49,17 @@ class WrzutaIE(InfoExtractor):
|
||||
|
||||
quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
|
||||
|
||||
audio_table = {'flv': 'mp3', 'webm': 'ogg'}
|
||||
audio_table = {'flv': 'mp3', 'webm': 'ogg', '???': 'mp3'}
|
||||
|
||||
embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
|
||||
|
||||
formats = []
|
||||
for media in embedpage['url']:
|
||||
fmt = media['type'].split('@')[0]
|
||||
if typ == 'audio':
|
||||
ext = audio_table[media['type'].split('@')[0]]
|
||||
ext = audio_table.get(fmt, fmt)
|
||||
else:
|
||||
ext = media['type'].split('@')[0]
|
||||
ext = fmt
|
||||
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (ext, media['quality'].lower()),
|
||||
|
||||
@@ -9,40 +9,30 @@ from ..utils import (
|
||||
|
||||
|
||||
class YouJizzIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P<id>[0-9]+)\.html(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
|
||||
'file': '2189178.flv',
|
||||
'md5': '07e15fa469ba384c7693fd246905547c',
|
||||
'info_dict': {
|
||||
'id': '2189178',
|
||||
'ext': 'flv',
|
||||
"title": "Zeichentrick 1",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
# Get webpage content
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>\s*(.*)\s*</title>', webpage, 'title')
|
||||
|
||||
# Get the video title
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
|
||||
webpage, 'title').strip()
|
||||
|
||||
# Get the embed page
|
||||
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
|
||||
if result is None:
|
||||
raise ExtractorError('ERROR: unable to extract embed page')
|
||||
|
||||
embed_page_url = result.group(0).strip()
|
||||
video_id = result.group('videoid')
|
||||
|
||||
webpage = self._download_webpage(embed_page_url, video_id)
|
||||
embed_page_url = self._search_regex(
|
||||
r'(https?://www.youjizz.com/videos/embed/[0-9]+)',
|
||||
webpage, 'embed page')
|
||||
webpage = self._download_webpage(
|
||||
embed_page_url, video_id, note='downloading embed page')
|
||||
|
||||
# Get the video URL
|
||||
m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage)
|
||||
|
||||
@@ -510,7 +510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
funcname = self._search_regex(
|
||||
r'signature=([$a-zA-Z]+)', jscode,
|
||||
r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode,
|
||||
'Initial JS player signature function name')
|
||||
|
||||
jsi = JSInterpreter(jscode)
|
||||
@@ -1043,6 +1043,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
||||
'info_dict': {
|
||||
'title': 'ytdl test PL',
|
||||
'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
|
||||
@@ -261,7 +261,16 @@ def parseOpts(overrideArguments=None):
|
||||
video_format.add_option(
|
||||
'-f', '--format',
|
||||
action='store', dest='format', metavar='FORMAT', default=None,
|
||||
help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio')
|
||||
help='video format code, specify the order of preference using'
|
||||
' slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also'
|
||||
' supported. You can also use the special names "best",'
|
||||
' "bestvideo", "bestaudio", "worst", "worstvideo" and'
|
||||
' "worstaudio". By default, youtube-dl will pick the best quality.'
|
||||
' Use commas to download multiple audio formats, such as'
|
||||
' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'
|
||||
' You can merge the video and audio of two formats into a single'
|
||||
' file using -f <video-format>+<audio-format> (requires ffmpeg or'
|
||||
' avconv), for example -f bestvideo+bestaudio.')
|
||||
video_format.add_option(
|
||||
'--all-formats',
|
||||
action='store_const', dest='format', const='all',
|
||||
@@ -481,10 +490,12 @@ def parseOpts(overrideArguments=None):
|
||||
'%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
|
||||
'%(upload_date)s for the upload date (YYYYMMDD), '
|
||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
|
||||
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
||||
'%(id)s for the video id, '
|
||||
'%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, '
|
||||
'%(playlist_index)s for the position in the playlist. '
|
||||
'%(height)s and %(width)s for the width and height of the video format. '
|
||||
'%(resolution)s for a textual description of the resolution of the video format. '
|
||||
'%% for a literal percent. '
|
||||
'Use - to output to stdout. Can also be used to download to a different directory, '
|
||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||
filesystem.add_option(
|
||||
|
||||
@@ -34,7 +34,6 @@ from .compat import (
|
||||
compat_chr,
|
||||
compat_getenv,
|
||||
compat_html_entities,
|
||||
compat_html_parser,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
@@ -844,10 +843,7 @@ def bytes_to_intlist(bs):
|
||||
def intlist_to_bytes(xs):
|
||||
if not xs:
|
||||
return b''
|
||||
if isinstance(chr(0), bytes): # Python 2
|
||||
return ''.join([chr(x) for x in xs])
|
||||
else:
|
||||
return bytes(xs)
|
||||
return struct.pack('%dB' % len(xs), *xs)
|
||||
|
||||
|
||||
# Cross-platform file locking
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.11.04'
|
||||
__version__ = '2014.11.13.1'
|
||||
|
||||
Reference in New Issue
Block a user