Compare commits
151 Commits
2015.01.02
...
2015.01.10
Author | SHA1 | Date | |
---|---|---|---|
![]() |
6ef9f88299 | ||
![]() |
f71fdb0acc | ||
![]() |
c24dfef63c | ||
![]() |
6271f1cad9 | ||
![]() |
fb4b030aaf | ||
![]() |
ff21a8e0ee | ||
![]() |
904fffffeb | ||
![]() |
51897bb77c | ||
![]() |
bd1a281ede | ||
![]() |
45598f1578 | ||
![]() |
d02115f837 | ||
![]() |
34c781a24d | ||
![]() |
1302394603 | ||
![]() |
dd622d7c4e | ||
![]() |
d120e9013f | ||
![]() |
b8da6b9fc6 | ||
![]() |
4baea47c42 | ||
![]() |
176cf9e0c3 | ||
![]() |
7b6faddfc8 | ||
![]() |
f90ad27375 | ||
![]() |
230b2287dd | ||
![]() |
754c838903 | ||
![]() |
aa2fd59857 | ||
![]() |
9932a65370 | ||
![]() |
5e4166478d | ||
![]() |
b0e87c3110 | ||
![]() |
ff0813313a | ||
![]() |
c0bdf32a3c | ||
![]() |
92b065dc53 | ||
![]() |
9298d4e3df | ||
![]() |
740a7fcbc8 | ||
![]() |
5fbf25a681 | ||
![]() |
db6e625005 | ||
![]() |
811cacdc2c | ||
![]() |
ce08a86462 | ||
![]() |
11497d5bba | ||
![]() |
0217c78377 | ||
![]() |
bd6b25ce0e | ||
![]() |
d51a853d5c | ||
![]() |
9ed99402f5 | ||
![]() |
ec3a6a3137 | ||
![]() |
796858a53f | ||
![]() |
5b78caca94 | ||
![]() |
bec2248141 | ||
![]() |
211503c39f | ||
![]() |
adb1307b9a | ||
![]() |
99673f04bc | ||
![]() |
e9a537774d | ||
![]() |
367f539769 | ||
![]() |
398133cf55 | ||
![]() |
52fc3ba405 | ||
![]() |
fdd6e18b75 | ||
![]() |
58a84b8cb6 | ||
![]() |
c5d666d374 | ||
![]() |
5d8993b06a | ||
![]() |
c758bf9fd7 | ||
![]() |
900813a328 | ||
![]() |
2bad0e5d20 | ||
![]() |
ccc5842bc9 | ||
![]() |
fd86c2026d | ||
![]() |
e4a8eae701 | ||
![]() |
75e51819d0 | ||
![]() |
8ee341500d | ||
![]() |
0590062925 | ||
![]() |
799d88d3d8 | ||
![]() |
760aea9a96 | ||
![]() |
d6a31b1766 | ||
![]() |
0b54a5b10a | ||
![]() |
6309cb9b41 | ||
![]() |
27a82a1b93 | ||
![]() |
ecd1936695 | ||
![]() |
76b3c61012 | ||
![]() |
0df2dea73b | ||
![]() |
f8bb576c4f | ||
![]() |
ee61f6f3e2 | ||
![]() |
f14f2a6d79 | ||
![]() |
2c322cc5d6 | ||
![]() |
3b8f3a1504 | ||
![]() |
8f9529cd05 | ||
![]() |
f4bca0b348 | ||
![]() |
6291438073 | ||
![]() |
18c3c15391 | ||
![]() |
dda620e88c | ||
![]() |
d7cc31b63e | ||
![]() |
5e3e1c82d8 | ||
![]() |
aa80652f47 | ||
![]() |
9d247bbd2d | ||
![]() |
93e40a7b2f | ||
![]() |
03ff2cc1c4 | ||
![]() |
a285b6377b | ||
![]() |
cd791a5ea0 | ||
![]() |
87830900a9 | ||
![]() |
dfc9d9f50a | ||
![]() |
75311a7e16 | ||
![]() |
628bc4d1e7 | ||
![]() |
a4c3f48639 | ||
![]() |
bdf80aa542 | ||
![]() |
adf3c58ad3 | ||
![]() |
caf90bfaa5 | ||
![]() |
2f985f4bb4 | ||
![]() |
67c2bcdf4c | ||
![]() |
1d2d0e3ff2 | ||
![]() |
9fda6ee39f | ||
![]() |
bc3e582fe4 | ||
![]() |
bc1fc5ddbc | ||
![]() |
63948fc62c | ||
![]() |
f4858a7103 | ||
![]() |
26886e6140 | ||
![]() |
7a1818c99b | ||
![]() |
2ccd1b10e5 | ||
![]() |
788fa208c8 | ||
![]() |
8848314c08 | ||
![]() |
c11125f9ed | ||
![]() |
95ceeec722 | ||
![]() |
b68ff25917 | ||
![]() |
3e3327ea17 | ||
![]() |
b158bb8693 | ||
![]() |
2bf098eda4 | ||
![]() |
382e05fa56 | ||
![]() |
19b05d886e | ||
![]() |
e65566a9cc | ||
![]() |
baa3c3f0f6 | ||
![]() |
f4f339529c | ||
![]() |
7d02fae85b | ||
![]() |
6e46c3f1fd | ||
![]() |
c7e675940c | ||
![]() |
d26b1317ed | ||
![]() |
a221f22969 | ||
![]() |
817f786fbb | ||
![]() |
62420c73cb | ||
![]() |
2522a0b7da | ||
![]() |
46d32a12c9 | ||
![]() |
c491418526 | ||
![]() |
c067545c17 | ||
![]() |
823a155293 | ||
![]() |
324b2c78fa | ||
![]() |
d34f98289b | ||
![]() |
644096b15c | ||
![]() |
15cebcc363 | ||
![]() |
faa4ea68c0 | ||
![]() |
476eae0c2a | ||
![]() |
8399267671 | ||
![]() |
5b9aefef77 | ||
![]() |
defaf19f5d | ||
![]() |
754f0008ec | ||
![]() |
2415951ead | ||
![]() |
995ad69c54 | ||
![]() |
225e4b9633 | ||
![]() |
6ce2c6783b | ||
![]() |
29f400b97d | ||
![]() |
7b61ac3ddf |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -31,3 +31,5 @@ updates_key.pem
|
||||
test/testdata
|
||||
.tox
|
||||
youtube-dl.zsh
|
||||
.idea
|
||||
.idea/*
|
@@ -9,7 +9,6 @@ notifications:
|
||||
email:
|
||||
- filippo.valsorda@gmail.com
|
||||
- phihag@phihag.de
|
||||
- jaime.marquinez.ferrandiz+travis@gmail.com
|
||||
- yasoob.khld@gmail.com
|
||||
# irc:
|
||||
# channels:
|
||||
|
3
AUTHORS
3
AUTHORS
@@ -98,3 +98,6 @@ Will Glynn
|
||||
Max Reimann
|
||||
Cédric Luthi
|
||||
Thijs Vermeir
|
||||
Joel Leclerc
|
||||
Christopher Krooss
|
||||
Ondřej Caletka
|
||||
|
@@ -44,7 +44,7 @@ In particular, every site support request issue should only pertain to services
|
||||
|
||||
### Is anyone going to need the feature?
|
||||
|
||||
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||
Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||
|
||||
### Is your question about youtube-dl?
|
||||
|
||||
|
4
Makefile
4
Makefile
@@ -46,7 +46,7 @@ test:
|
||||
ot: offlinetest
|
||||
|
||||
offlinetest: codetest
|
||||
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations
|
||||
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
|
||||
|
||||
tar: youtube-dl.tar.gz
|
||||
|
||||
@@ -63,7 +63,7 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||
chmod a+x youtube-dl
|
||||
|
||||
README.md: youtube_dl/*.py youtube_dl/*/*.py
|
||||
COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py
|
||||
COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py
|
||||
|
||||
CONTRIBUTING.md: README.md
|
||||
python devscripts/make_contributing.py README.md CONTRIBUTING.md
|
||||
|
38
README.md
38
README.md
@@ -219,6 +219,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
for each command-line argument. If the URL
|
||||
refers to a playlist, dump the whole
|
||||
playlist information in a single line.
|
||||
--print-json Be quiet and print the video information as
|
||||
JSON (video is still being downloaded).
|
||||
--newline output progress bar as new lines
|
||||
--no-progress do not print progress bar
|
||||
--console-title display progress in console titlebar
|
||||
@@ -248,14 +250,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
preference using slashes: -f 22/17/18 . -f
|
||||
mp4 , -f m4a and -f flv are also
|
||||
supported. You can also use the special
|
||||
names "best", "bestvideo", "bestaudio",
|
||||
"worst", "worstvideo" and "worstaudio". By
|
||||
default, youtube-dl will pick the best
|
||||
quality. Use commas to download multiple
|
||||
audio formats, such as -f
|
||||
preference using slashes, as in -f 22/17/18
|
||||
. Instead of format codes, you can select
|
||||
by extension for the extensions aac, m4a,
|
||||
mp3, mp4, ogg, wav, webm. You can also use
|
||||
the special names "best", "bestvideo",
|
||||
"bestaudio", "worst". By default, youtube-
|
||||
dl will pick the best quality. Use commas
|
||||
to download multiple audio formats, such as
|
||||
-f
|
||||
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
||||
You can merge the video and audio of two
|
||||
formats into a single file using -f <video-
|
||||
@@ -269,6 +272,10 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-F, --list-formats list all available formats
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifest on
|
||||
YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g.
|
||||
bestvideo+bestaudio), output to given
|
||||
container format. One of mkv, mp4, ogg,
|
||||
webm, flv.Ignored if no merge is required
|
||||
|
||||
## Subtitle Options:
|
||||
--write-sub write subtitle file
|
||||
@@ -315,6 +322,11 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--add-metadata write metadata to the video file
|
||||
--xattrs write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--fixup POLICY (experimental) Automatically correct known
|
||||
faults of the file. One of never (do
|
||||
nothing), warn (only emit a warning),
|
||||
detect_or_warn(check whether we can do
|
||||
anything about it, warn otherwise
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||
postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
@@ -326,7 +338,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
@@ -449,6 +461,14 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
|
||||
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||
|
||||
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
|
||||
|
||||
If you put youtube-dl and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome.
|
||||
|
||||
To make a different directory work - either for ffmpeg, or for youtube-dl, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\<User name>\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory.
|
||||
|
||||
From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
|
||||
|
||||
### How can I detect whether a given URL is supported by youtube-dl?
|
||||
|
||||
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||
|
@@ -16,7 +16,7 @@ def main():
|
||||
template = tmplf.read()
|
||||
|
||||
ie_htmls = []
|
||||
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
|
||||
for ie in youtube_dl.list_extractors(age_limit=None):
|
||||
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
|
||||
ie_desc = getattr(ie, 'IE_DESC', None)
|
||||
if ie_desc is False:
|
||||
|
@@ -23,12 +23,12 @@ def main():
|
||||
|
||||
def gen_ies_md(ies):
|
||||
for ie in ies:
|
||||
ie_md = '**{}**'.format(ie.IE_NAME)
|
||||
ie_md = '**{0}**'.format(ie.IE_NAME)
|
||||
ie_desc = getattr(ie, 'IE_DESC', None)
|
||||
if ie_desc is False:
|
||||
continue
|
||||
if ie_desc is not None:
|
||||
ie_md += ': {}'.format(ie.IE_DESC)
|
||||
ie_md += ': {0}'.format(ie.IE_DESC)
|
||||
if not ie.working():
|
||||
ie_md += ' (Currently broken)'
|
||||
yield ie_md
|
||||
|
@@ -82,18 +82,8 @@ class FakeYDL(YoutubeDL):
|
||||
|
||||
def gettestcases(include_onlymatching=False):
|
||||
for ie in youtube_dl.extractor.gen_extractors():
|
||||
t = getattr(ie, '_TEST', None)
|
||||
if t:
|
||||
assert not hasattr(ie, '_TESTS'), \
|
||||
'%s has _TEST and _TESTS' % type(ie).__name__
|
||||
tests = [t]
|
||||
else:
|
||||
tests = getattr(ie, '_TESTS', [])
|
||||
for t in tests:
|
||||
if not include_onlymatching and t.get('only_matching', False):
|
||||
continue
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
yield t
|
||||
for tc in ie.get_testcases(include_onlymatching):
|
||||
yield tc
|
||||
|
||||
|
||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
@@ -120,6 +110,20 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(got_dict.get(info_field))
|
||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(
|
||||
isinstance(got, list),
|
||||
'Expected field %s to be a list, but it is of type %s' % (
|
||||
info_field, type(got).__name__))
|
||||
expected_num = int(expected.partition(':')[2])
|
||||
assertGreaterEqual(
|
||||
self, len(got), expected_num,
|
||||
'Expected %d items in field %s, but only got %d' % (
|
||||
expected_num, info_field, len(got)
|
||||
)
|
||||
)
|
||||
continue
|
||||
else:
|
||||
got = got_dict.get(info_field)
|
||||
self.assertEqual(expected, got,
|
||||
|
@@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||
|
||||
def test_html_search_meta(self):
|
||||
ie = self.ie
|
||||
html = '''
|
||||
<meta name="a" content="1" />
|
||||
<meta name='b' content='2'>
|
||||
<meta name="c" content='3'>
|
||||
<meta name=d content='4'>
|
||||
<meta property="e" content='5' >
|
||||
<meta content="6" name="f">
|
||||
'''
|
||||
|
||||
self.assertEqual(ie._html_search_meta('a', html), '1')
|
||||
self.assertEqual(ie._html_search_meta('b', html), '2')
|
||||
self.assertEqual(ie._html_search_meta('c', html), '3')
|
||||
self.assertEqual(ie._html_search_meta('d', html), '4')
|
||||
self.assertEqual(ie._html_search_meta('e', html), '5')
|
||||
self.assertEqual(ie._html_search_meta('f', html), '6')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -8,6 +8,8 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import copy
|
||||
|
||||
from test.helper import FakeYDL, assertRegexpMatches
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
@@ -192,6 +194,37 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'vid-high')
|
||||
|
||||
def test_format_selection_audio_exts(self):
|
||||
formats = [
|
||||
{'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
|
||||
{'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
|
||||
{'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
|
||||
{'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
|
||||
{'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
|
||||
]
|
||||
|
||||
info_dict = _make_result(formats)
|
||||
ydl = YDL({'format': 'best'})
|
||||
ie = YoutubeIE(ydl)
|
||||
ie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'aac-64')
|
||||
|
||||
ydl = YDL({'format': 'mp3'})
|
||||
ie = YoutubeIE(ydl)
|
||||
ie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'mp3-64')
|
||||
|
||||
ydl = YDL({'prefer_free_formats': True})
|
||||
ie = YoutubeIE(ydl)
|
||||
ie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'ogg-64')
|
||||
|
||||
def test_format_selection_video(self):
|
||||
formats = [
|
||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
|
||||
@@ -218,7 +251,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
# 3D
|
||||
'85', '84', '102', '83', '101', '82', '100',
|
||||
# Dash video
|
||||
'138', '137', '248', '136', '247', '135', '246',
|
||||
'137', '248', '136', '247', '135', '246',
|
||||
'245', '244', '134', '243', '133', '242', '160',
|
||||
# Dash audio
|
||||
'141', '172', '140', '171', '139',
|
||||
|
@@ -45,11 +45,6 @@ class TestAgeRestriction(unittest.TestCase):
|
||||
'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||
'505835.mp4', 2, old_age=25)
|
||||
|
||||
def test_pornotube(self):
|
||||
self._assert_restricted(
|
||||
'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
|
||||
'1689755.flv', 13)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -17,6 +17,7 @@ from youtube_dl.extractor import (
|
||||
TEDIE,
|
||||
VimeoIE,
|
||||
WallaIE,
|
||||
CeskaTelevizeIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -317,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
|
||||
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
||||
IE = CeskaTelevizeIE
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['cs']))
|
||||
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -16,6 +16,7 @@ import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from youtube_dl.utils import (
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
clean_html,
|
||||
DateRange,
|
||||
@@ -402,5 +403,12 @@ Trying to open render node...
|
||||
Success at /dev/dri/renderD128.
|
||||
ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
|
||||
def test_age_restricted(self):
|
||||
self.assertFalse(age_restricted(None, 10)) # unrestricted content
|
||||
self.assertFalse(age_restricted(1, None)) # unrestricted policy
|
||||
self.assertFalse(age_restricted(8, 10))
|
||||
self.assertTrue(age_restricted(18, 14))
|
||||
self.assertFalse(age_restricted(18, 18))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -63,12 +63,14 @@ from .utils import (
|
||||
YoutubeDLHandler,
|
||||
prepend_extension,
|
||||
args_to_str,
|
||||
age_restricted,
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .downloader import get_suitable_downloader
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .postprocessor import (
|
||||
FFmpegFixupStretchedPP,
|
||||
FFmpegMergerPP,
|
||||
FFmpegPostProcessor,
|
||||
get_postprocessor,
|
||||
@@ -202,6 +204,13 @@ class YoutubeDL(object):
|
||||
|
||||
Progress hooks are guaranteed to be called at least once
|
||||
(with status "finished") if the download is successful.
|
||||
merge_output_format: Extension to use when merging formats.
|
||||
fixup: Automatically correct known faults of the file.
|
||||
One of:
|
||||
- "never": do nothing
|
||||
- "warn": only emit a warning
|
||||
- "detect_or_warn": check whether we can do anything
|
||||
about it, warn otherwise
|
||||
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
@@ -550,13 +559,8 @@ class YoutubeDL(object):
|
||||
max_views = self.params.get('max_views')
|
||||
if max_views is not None and view_count > max_views:
|
||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||
age_limit = self.params.get('age_limit')
|
||||
if age_limit is not None:
|
||||
actual_age_limit = info_dict.get('age_limit')
|
||||
if actual_age_limit is None:
|
||||
actual_age_limit = 0
|
||||
if age_limit < actual_age_limit:
|
||||
return 'Skipping "' + title + '" because it is age restricted'
|
||||
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
||||
return 'Skipping "%s" because it is age restricted' % title
|
||||
if self.in_download_archive(info_dict):
|
||||
return '%s has already been recorded in archive' % video_title
|
||||
return None
|
||||
@@ -790,7 +794,7 @@ class YoutubeDL(object):
|
||||
if video_formats:
|
||||
return video_formats[0]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
@@ -913,10 +917,24 @@ class YoutubeDL(object):
|
||||
'contain the video, try using '
|
||||
'"-f %s+%s"' % (format_2, format_1))
|
||||
return
|
||||
output_ext = (
|
||||
formats_info[0]['ext']
|
||||
if self.params.get('merge_output_format') is None
|
||||
else self.params['merge_output_format'])
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'ext': formats_info[0]['ext'],
|
||||
'width': formats_info[0].get('width'),
|
||||
'height': formats_info[0].get('height'),
|
||||
'resolution': formats_info[0].get('resolution'),
|
||||
'fps': formats_info[0].get('fps'),
|
||||
'vcodec': formats_info[0].get('vcodec'),
|
||||
'vbr': formats_info[0].get('vbr'),
|
||||
'stretched_ratio': formats_info[0].get('stretched_ratio'),
|
||||
'acodec': formats_info[1].get('acodec'),
|
||||
'abr': formats_info[1].get('abr'),
|
||||
'ext': output_ext,
|
||||
}
|
||||
else:
|
||||
selected_format = None
|
||||
@@ -1144,6 +1162,27 @@ class YoutubeDL(object):
|
||||
return
|
||||
|
||||
if success:
|
||||
# Fixup content
|
||||
stretched_ratio = info_dict.get('stretched_ratio')
|
||||
if stretched_ratio is not None and stretched_ratio != 1:
|
||||
fixup_policy = self.params.get('fixup')
|
||||
if fixup_policy is None:
|
||||
fixup_policy = 'detect_or_warn'
|
||||
if fixup_policy == 'warn':
|
||||
self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
|
||||
info_dict['id'], stretched_ratio))
|
||||
elif fixup_policy == 'detect_or_warn':
|
||||
stretched_pp = FFmpegFixupStretchedPP(self)
|
||||
if stretched_pp.available:
|
||||
info_dict.setdefault('__postprocessors', [])
|
||||
info_dict['__postprocessors'].append(stretched_pp)
|
||||
else:
|
||||
self.report_warning(
|
||||
'%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
|
||||
info_dict['id'], stretched_ratio))
|
||||
else:
|
||||
assert fixup_policy == 'ignore'
|
||||
|
||||
try:
|
||||
self.post_process(filename, info_dict)
|
||||
except (PostProcessingError) as err:
|
||||
@@ -1333,7 +1372,9 @@ class YoutubeDL(object):
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
idlen = max(len('format code'),
|
||||
max(len(f['format_id']) for f in formats))
|
||||
formats_s = [line(f, idlen) for f in formats]
|
||||
formats_s = [
|
||||
line(f, idlen) for f in formats
|
||||
if f.get('preference') is None or f['preference'] >= -1000]
|
||||
if len(formats) > 1:
|
||||
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
|
||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
||||
|
@@ -38,7 +38,7 @@ from .update import update_self
|
||||
from .downloader import (
|
||||
FileDownloader,
|
||||
)
|
||||
from .extractor import gen_extractors
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
|
||||
@@ -95,17 +95,15 @@ def _real_main(argv=None):
|
||||
_enc = preferredencoding()
|
||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||
|
||||
extractors = gen_extractors()
|
||||
|
||||
if opts.list_extractors:
|
||||
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
|
||||
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
||||
for mu in matchedUrls:
|
||||
compat_print(' ' + mu)
|
||||
sys.exit(0)
|
||||
if opts.list_extractor_descriptions:
|
||||
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
if not ie._WORKING:
|
||||
continue
|
||||
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
|
||||
@@ -168,6 +166,7 @@ def _real_main(argv=None):
|
||||
if opts.recodevideo is not None:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
|
||||
parser.error('invalid video recode format specified')
|
||||
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
else:
|
||||
@@ -199,7 +198,8 @@ def _real_main(argv=None):
|
||||
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||
' template'.format(outtmpl))
|
||||
|
||||
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||
any_printing = opts.print_json
|
||||
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||
|
||||
# PostProcessors
|
||||
@@ -245,7 +245,7 @@ def _real_main(argv=None):
|
||||
'password': opts.password,
|
||||
'twofactor': opts.twofactor,
|
||||
'videopassword': opts.videopassword,
|
||||
'quiet': (opts.quiet or any_printing),
|
||||
'quiet': (opts.quiet or any_getting or any_printing),
|
||||
'no_warnings': opts.no_warnings,
|
||||
'forceurl': opts.geturl,
|
||||
'forcetitle': opts.gettitle,
|
||||
@@ -255,9 +255,9 @@ def _real_main(argv=None):
|
||||
'forceduration': opts.getduration,
|
||||
'forcefilename': opts.getfilename,
|
||||
'forceformat': opts.getformat,
|
||||
'forcejson': opts.dumpjson,
|
||||
'forcejson': opts.dumpjson or opts.print_json,
|
||||
'dump_single_json': opts.dump_single_json,
|
||||
'simulate': opts.simulate or any_printing,
|
||||
'simulate': opts.simulate or any_getting,
|
||||
'skip_download': opts.skip_download,
|
||||
'format': opts.format,
|
||||
'format_limit': opts.format_limit,
|
||||
@@ -324,7 +324,9 @@ def _real_main(argv=None):
|
||||
'encoding': opts.encoding,
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
'extract_flat': opts.extract_flat,
|
||||
'merge_output_format': opts.merge_output_format,
|
||||
'postprocessors': postprocessors,
|
||||
'fixup': opts.fixup,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
@@ -365,3 +367,5 @@ def main(argv=None):
|
||||
sys.exit('ERROR: fixed output name but more than one file to download')
|
||||
except KeyboardInterrupt:
|
||||
sys.exit('\nERROR: Interrupted by user')
|
||||
|
||||
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
|
||||
|
@@ -187,24 +187,34 @@ def build_fragments_list(boot_info):
|
||||
return res
|
||||
|
||||
|
||||
def write_flv_header(stream, metadata):
|
||||
"""Writes the FLV header and the metadata to stream"""
|
||||
def write_unsigned_int(stream, val):
|
||||
stream.write(struct_pack('!I', val))
|
||||
|
||||
|
||||
def write_unsigned_int_24(stream, val):
|
||||
stream.write(struct_pack('!I', val)[1:])
|
||||
|
||||
|
||||
def write_flv_header(stream):
|
||||
"""Writes the FLV header to stream"""
|
||||
# FLV header
|
||||
stream.write(b'FLV\x01')
|
||||
stream.write(b'\x05')
|
||||
stream.write(b'\x00\x00\x00\x09')
|
||||
# FLV File body
|
||||
stream.write(b'\x00\x00\x00\x00')
|
||||
# FLVTAG
|
||||
# Script data
|
||||
stream.write(b'\x12')
|
||||
# Size of the metadata with 3 bytes
|
||||
stream.write(struct_pack('!L', len(metadata))[1:])
|
||||
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
|
||||
stream.write(metadata)
|
||||
# Magic numbers extracted from the output files produced by AdobeHDS.php
|
||||
# (https://github.com/K-S-V/Scripts)
|
||||
stream.write(b'\x00\x00\x01\x73')
|
||||
|
||||
|
||||
def write_metadata_tag(stream, metadata):
|
||||
"""Writes optional metadata tag to stream"""
|
||||
SCRIPT_TAG = b'\x12'
|
||||
FLV_TAG_HEADER_LEN = 11
|
||||
|
||||
if metadata:
|
||||
stream.write(SCRIPT_TAG)
|
||||
write_unsigned_int_24(stream, len(metadata))
|
||||
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
|
||||
stream.write(metadata)
|
||||
write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
|
||||
|
||||
|
||||
def _add_ns(prop):
|
||||
@@ -256,7 +266,11 @@ class F4mFD(FileDownloader):
|
||||
bootstrap = self.ydl.urlopen(bootstrap_url).read()
|
||||
else:
|
||||
bootstrap = base64.b64decode(bootstrap_node.text)
|
||||
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
|
||||
metadata_node = media.find(_add_ns('metadata'))
|
||||
if metadata_node is not None:
|
||||
metadata = base64.b64decode(metadata_node.text)
|
||||
else:
|
||||
metadata = None
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
|
||||
fragments_list = build_fragments_list(boot_info)
|
||||
@@ -269,7 +283,8 @@ class F4mFD(FileDownloader):
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||
write_flv_header(dest_stream, metadata)
|
||||
write_flv_header(dest_stream)
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
|
@@ -11,7 +11,6 @@ from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
@@ -27,16 +26,13 @@ class HlsFD(FileDownloader):
|
||||
'-bsf:a', 'aac_adtstoasc',
|
||||
encodeFilename(tmpfilename, for_subprocess=True)]
|
||||
|
||||
for program in ['avconv', 'ffmpeg']:
|
||||
if check_executable(program, ['-version']):
|
||||
break
|
||||
else:
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
program = ffpp._executable
|
||||
if program is None:
|
||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
return False
|
||||
cmd = [program] + args
|
||||
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
ffpp.check_version()
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
if retval == 0:
|
||||
|
@@ -4,8 +4,8 @@ import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_subprocess_get_DEVNULL
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
@@ -20,11 +20,7 @@ class MplayerFD(FileDownloader):
|
||||
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
||||
'-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
# Check for mplayer first
|
||||
try:
|
||||
subprocess.call(
|
||||
['mplayer', '-h'],
|
||||
stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
if not check_executable('mplayer', ['-h']):
|
||||
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
|
||||
return False
|
||||
|
||||
|
@@ -26,7 +26,7 @@ from .arte import (
|
||||
ArteTVEmbedIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .audiomack import AudiomackIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
from .auengine import AUEngineIE
|
||||
from .azubu import AzubuIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
@@ -159,6 +159,7 @@ from .gametrailers import GametrailersIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
from .globo import GloboIE
|
||||
from .godtube import GodTubeIE
|
||||
@@ -273,6 +274,7 @@ from .nbc import (
|
||||
)
|
||||
from .ndr import NDRIE
|
||||
from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
@@ -325,6 +327,7 @@ from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .quickvid import QuickVidIE
|
||||
from .radiode import RadioDeIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rai import RaiIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
@@ -345,6 +348,7 @@ from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
RutubeChannelIE,
|
||||
RutubeEmbedIE,
|
||||
RutubeMovieIE,
|
||||
RutubePersonIE,
|
||||
)
|
||||
@@ -443,7 +447,7 @@ from .tunein import TuneInIE
|
||||
from .turbo import TurboIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twitch import TwitchIE
|
||||
@@ -510,6 +514,7 @@ from .wdr import (
|
||||
WDRMobileIE,
|
||||
WDRMausIE,
|
||||
)
|
||||
from .webofstories import WebOfStoriesIE
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
@@ -545,7 +550,6 @@ from .youtube import (
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTopListIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeUserIE,
|
||||
@@ -572,6 +576,17 @@ def gen_extractors():
|
||||
return [klass() for klass in _ALL_CLASSES]
|
||||
|
||||
|
||||
def list_extractors(age_limit):
|
||||
"""
|
||||
Return a list of extractors that are suitable for the given age,
|
||||
sorted by extractor ID.
|
||||
"""
|
||||
|
||||
return sorted(
|
||||
filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
|
||||
key=lambda ie: ie.IE_NAME.lower())
|
||||
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
return globals()[ie_name + 'IE']
|
||||
|
@@ -1,11 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .soundcloud import SoundcloudIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
import time
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class AudiomackIE(InfoExtractor):
|
||||
@@ -17,12 +21,13 @@ class AudiomackIE(InfoExtractor):
|
||||
'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
|
||||
'info_dict':
|
||||
{
|
||||
'id': 'roosh-williams/extraordinary',
|
||||
'id': '310086',
|
||||
'ext': 'mp3',
|
||||
'title': 'Roosh Williams - Extraordinary'
|
||||
'uploader': 'Roosh Williams',
|
||||
'title': 'Extraordinary'
|
||||
}
|
||||
},
|
||||
# hosted on soundcloud via audiomack
|
||||
# audiomack wrapper around soundcloud song
|
||||
{
|
||||
'add_ie': ['Soundcloud'],
|
||||
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
|
||||
@@ -38,32 +43,97 @@ class AudiomackIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# URLs end with [uploader name]/[uploader title]
|
||||
# this title is whatever the user types in, and is rarely
|
||||
# the proper song title. Real metadata is in the api response
|
||||
album_url_tag = self._match_id(url)
|
||||
|
||||
# Request the extended version of the api for extra fields like artist and title
|
||||
api_response = self._download_json(
|
||||
"http://www.audiomack.com/api/music/url/song/%s?_=%d" % (
|
||||
video_id, time.time()),
|
||||
video_id)
|
||||
'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % (
|
||||
album_url_tag, time.time()),
|
||||
album_url_tag)
|
||||
|
||||
if "url" not in api_response:
|
||||
raise ExtractorError("Unable to deduce api url of song")
|
||||
realurl = api_response["url"]
|
||||
# API is inconsistent with errors
|
||||
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url %s', url)
|
||||
|
||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# - if so, pass the work off to the soundcloud extractor
|
||||
if SoundcloudIE.suitable(realurl):
|
||||
return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
artist = self._html_search_regex(
|
||||
r'<span class="artist">(.*?)</span>', webpage, "artist")
|
||||
songtitle = self._html_search_regex(
|
||||
r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>',
|
||||
webpage, "title")
|
||||
title = artist + " - " + songtitle
|
||||
# if so, pass the work off to the soundcloud extractor
|
||||
if SoundcloudIE.suitable(api_response['url']):
|
||||
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': realurl,
|
||||
'id': api_response.get('id', album_url_tag),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title'),
|
||||
'url': api_response['url'],
|
||||
}
|
||||
|
||||
|
||||
class AudiomackAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
|
||||
IE_NAME = 'audiomack:album'
|
||||
_TESTS = [
|
||||
# Standard album playlist
|
||||
{
|
||||
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
|
||||
'playlist_count': 15,
|
||||
'info_dict':
|
||||
{
|
||||
'id': '812251',
|
||||
'title': 'Tha Tour: Part 2 (Official Mixtape)'
|
||||
}
|
||||
},
|
||||
# Album playlist ripped from fakeshoredrive with no metadata
|
||||
{
|
||||
'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
|
||||
'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
|
||||
'ext': 'mp3',
|
||||
}
|
||||
}],
|
||||
'params': {
|
||||
'playliststart': 8,
|
||||
'playlistend': 8,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# URLs end with [uploader name]/[uploader title]
|
||||
# this title is whatever the user types in, and is rarely
|
||||
# the proper song title. Real metadata is in the api response
|
||||
album_url_tag = self._match_id(url)
|
||||
result = {'_type': 'playlist', 'entries': []}
|
||||
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
|
||||
# Therefore we don't know how many songs the album has and must infi-loop until failure
|
||||
for track_no in itertools.count():
|
||||
# Get song's metadata
|
||||
api_response = self._download_json(
|
||||
'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
|
||||
% (album_url_tag, track_no, time.time()), album_url_tag,
|
||||
note='Querying song information (%d)' % (track_no + 1))
|
||||
|
||||
# Total failure, only occurs when url is totally wrong
|
||||
# Won't happen in middle of valid playlist (next case)
|
||||
if 'url' not in api_response or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
|
||||
# URL is good but song id doesn't exist - usually means end of playlist
|
||||
elif not api_response['url']:
|
||||
break
|
||||
else:
|
||||
# Pull out the album metadata and add to result (if it exists)
|
||||
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
||||
if apikey in api_response and resultkey not in result:
|
||||
result[resultkey] = api_response[apikey]
|
||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||
result['entries'].append({
|
||||
'id': api_response.get('id', song_id),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title', song_id),
|
||||
'url': api_response['url'],
|
||||
})
|
||||
return result
|
||||
|
@@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,23 +28,18 @@ class AUEngineIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
|
||||
title = title.strip()
|
||||
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
|
||||
links = map(compat_urllib_parse.unquote, links)
|
||||
title = self._html_search_regex(
|
||||
r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title')
|
||||
video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage)
|
||||
video_url = compat_urllib_parse.unquote(video_urls[0])
|
||||
thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage)
|
||||
thumbnail = compat_urllib_parse.unquote(thumbnails[0])
|
||||
|
||||
thumbnail = None
|
||||
video_url = None
|
||||
for link in links:
|
||||
if link.endswith('.png'):
|
||||
thumbnail = link
|
||||
elif '/videos/' in link:
|
||||
video_url = link
|
||||
if not video_url:
|
||||
raise ExtractorError('Could not find video URL')
|
||||
|
||||
ext = '.' + determine_ext(video_url)
|
||||
if ext == title[-len(ext):]:
|
||||
title = title[:-len(ext)]
|
||||
title = remove_end(title, ext)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/(?:episode|playlist))/(?P<id>[\da-z]{8})'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -18,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Kaleidoscope: Leonard Cohen',
|
||||
'description': 'md5:db4755d7a665ae72343779f7dacb402c',
|
||||
'title': 'Kaleidoscope, Leonard Cohen',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
@@ -84,9 +84,40 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p02frcch',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||
'duration': 3507,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||
'note': 'Video',
|
||||
'info_dict': {
|
||||
'id': 'p025c103',
|
||||
'ext': 'flv',
|
||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||
'duration': 226,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
@@ -16,7 +16,7 @@ class BetIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
||||
'info_dict': {
|
||||
'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
|
||||
'id': '740ab250-bb94-4a8a-8787-fe0de7c74471',
|
||||
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
||||
'ext': 'flv',
|
||||
'title': 'BET News Presents: A Conversation With President Obama',
|
||||
@@ -35,7 +35,7 @@ class BetIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
||||
'info_dict': {
|
||||
'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
|
||||
'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d',
|
||||
'display_id': 'justice-for-ferguson-a-community-reacts',
|
||||
'ext': 'flv',
|
||||
'title': 'Justice for Ferguson: A Community Reacts',
|
||||
@@ -55,7 +55,6 @@ class BetIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
media_url = compat_urllib_parse.unquote(self._search_regex(
|
||||
|
@@ -4,9 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -54,45 +52,38 @@ class BiliBiliIE(InfoExtractor):
|
||||
thumbnail = self._html_search_meta(
|
||||
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
|
||||
|
||||
player_params = compat_parse_qs(self._html_search_regex(
|
||||
r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"',
|
||||
webpage, 'player params'))
|
||||
cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
|
||||
|
||||
if 'cid' in player_params:
|
||||
cid = player_params['cid'][0]
|
||||
lq_doc = self._download_xml(
|
||||
'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading LQ video info'
|
||||
)
|
||||
lq_durl = lq_doc.find('./durl')
|
||||
formats = [{
|
||||
'format_id': 'lq',
|
||||
'quality': 1,
|
||||
'url': lq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
lq_durl.find('./size'), get_attr='text'),
|
||||
}]
|
||||
|
||||
lq_doc = self._download_xml(
|
||||
'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading LQ video info'
|
||||
)
|
||||
lq_durl = lq_doc.find('.//durl')
|
||||
formats = [{
|
||||
'format_id': 'lq',
|
||||
'quality': 1,
|
||||
'url': lq_durl.find('./url').text,
|
||||
hq_doc = self._download_xml(
|
||||
'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading HQ video info',
|
||||
fatal=False,
|
||||
)
|
||||
if hq_doc is not False:
|
||||
hq_durl = hq_doc.find('./durl')
|
||||
formats.append({
|
||||
'format_id': 'hq',
|
||||
'quality': 2,
|
||||
'ext': 'flv',
|
||||
'url': hq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
lq_durl.find('./size'), get_attr='text'),
|
||||
}]
|
||||
|
||||
hq_doc = self._download_xml(
|
||||
'http://interface.bilibili.cn/playurl?cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading HQ video info',
|
||||
fatal=False,
|
||||
)
|
||||
if hq_doc is not False:
|
||||
hq_durl = hq_doc.find('.//durl')
|
||||
formats.append({
|
||||
'format_id': 'hq',
|
||||
'quality': 2,
|
||||
'ext': 'flv',
|
||||
'url': hq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
hq_durl.find('./size'), get_attr='text'),
|
||||
})
|
||||
else:
|
||||
raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
|
||||
hq_durl.find('./size'), get_attr='text'),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
|
@@ -33,7 +33,7 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'skip_download': True, # Got enough YouTube download tests
|
||||
},
|
||||
'info_dict': {
|
||||
'description': 'Munchkin the Teddy Bear is back !',
|
||||
'description': 're:Munchkin the Teddy Bear is back ?!',
|
||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||
},
|
||||
'playlist': [{
|
||||
@@ -42,9 +42,9 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141124',
|
||||
'uploader_id': 'CindysMunchkin',
|
||||
'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
|
||||
'description': 're:© 2014 Munchkin the Shih Tzu',
|
||||
'uploader': 'Munchkin the Shih Tzu',
|
||||
'title': 'Munchkin the Teddy Bear gets her exercise',
|
||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||
},
|
||||
}]
|
||||
}]
|
||||
|
@@ -5,6 +5,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
qualities,
|
||||
@@ -76,6 +78,16 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
|
||||
|
||||
fmt_url = next(iter(media.find('VIDEOS'))).text
|
||||
if '/geo' in fmt_url.lower():
|
||||
response = self._request_webpage(
|
||||
HEADRequest(fmt_url), video_id,
|
||||
'Checking if the video is georestricted')
|
||||
if '/blocage' in response.geturl():
|
||||
raise ExtractorError(
|
||||
'The video is not available in your country',
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
for fmt in media.find('VIDEOS'):
|
||||
format_url = fmt.text
|
||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [
|
||||
@@ -104,6 +104,17 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
duration = float_or_none(item.get('duration'))
|
||||
thumbnail = item.get('previewImageUrl')
|
||||
|
||||
subtitles = {}
|
||||
subs = item.get('subtitles')
|
||||
if subs:
|
||||
subtitles['cs'] = subs[0]['url']
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': title,
|
||||
@@ -111,4 +122,34 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fix_subtitles(subtitles):
|
||||
""" Convert millisecond-based subtitles to SRT """
|
||||
if subtitles is None:
|
||||
return subtitles # subtitles not requested
|
||||
|
||||
def _msectotimecode(msec):
|
||||
""" Helper utility to convert milliseconds to timecode """
|
||||
components = []
|
||||
for divider in [1000, 60, 60, 100]:
|
||||
components.append(msec % divider)
|
||||
msec //= divider
|
||||
return "{3:02}:{2:02}:{1:02},{0:03}".format(*components)
|
||||
|
||||
def _fix_subtitle(subtitle):
|
||||
for line in subtitle.splitlines():
|
||||
m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line)
|
||||
if m:
|
||||
yield m.group(1)
|
||||
start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
|
||||
yield "{0} --> {1}".format(start, stop)
|
||||
else:
|
||||
yield line
|
||||
|
||||
fixed_subtitles = {}
|
||||
for k, v in subtitles.items():
|
||||
fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
|
||||
return fixed_subtitles
|
||||
|
@@ -21,6 +21,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
age_restricted,
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
@@ -92,6 +93,8 @@ class InfoExtractor(object):
|
||||
by this field, regardless of all other values.
|
||||
-1 for default (order by other properties),
|
||||
-2 or smaller for less than default.
|
||||
< -1000 to hide the format (if there is
|
||||
another one which is strictly better)
|
||||
* language_preference Is this in the correct requested
|
||||
language?
|
||||
10 if it's what the URL is about,
|
||||
@@ -111,6 +114,9 @@ class InfoExtractor(object):
|
||||
to add to the request.
|
||||
* http_post_data Additional data to send with a POST
|
||||
request.
|
||||
* stretched_ratio If given and not 1, indicates that the
|
||||
video's pixels are not square.
|
||||
width : height ratio as float.
|
||||
url: Final video URL.
|
||||
ext: Video filename extension.
|
||||
format: The video format, defaults to ext (used for --get-format)
|
||||
@@ -144,6 +150,17 @@ class InfoExtractor(object):
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
comment_count: Number of comments on the video
|
||||
comments: A list of comments, each with one or more of the following
|
||||
properties (all but one of text or html optional):
|
||||
* "author" - human-readable name of the comment author
|
||||
* "author_id" - user ID of the comment author
|
||||
* "id" - Comment ID
|
||||
* "html" - Comment as HTML
|
||||
* "text" - Plain text of the comment
|
||||
* "timestamp" - UNIX timestamp of comment
|
||||
* "parent" - ID of the comment this one is replying to.
|
||||
Set to "root" to indicate that this is a
|
||||
comment to the original video.
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||
should allow to get the same result again. (It will be set
|
||||
@@ -362,9 +379,19 @@ class InfoExtractor(object):
|
||||
|
||||
return content
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
|
||||
""" Returns the data of the page as a string """
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
|
||||
success = False
|
||||
try_count = 0
|
||||
while success is False:
|
||||
try:
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
|
||||
success = True
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
try_count += 1
|
||||
if try_count >= tries:
|
||||
raise e
|
||||
self._sleep(timeout, video_id)
|
||||
if res is False:
|
||||
return res
|
||||
else:
|
||||
@@ -591,7 +618,7 @@ class InfoExtractor(object):
|
||||
return self._html_search_regex(
|
||||
r'''(?isx)<meta
|
||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||
[^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
|
||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
@@ -875,6 +902,35 @@ class InfoExtractor(object):
|
||||
None, '/', True, False, expire_time, '', None, None, None)
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def get_testcases(self, include_onlymatching=False):
|
||||
t = getattr(self, '_TEST', None)
|
||||
if t:
|
||||
assert not hasattr(self, '_TESTS'), \
|
||||
'%s has _TEST and _TESTS' % type(self).__name__
|
||||
tests = [t]
|
||||
else:
|
||||
tests = getattr(self, '_TESTS', [])
|
||||
for t in tests:
|
||||
if not include_onlymatching and t.get('only_matching', False):
|
||||
continue
|
||||
t['name'] = type(self).__name__[:-len('IE')]
|
||||
yield t
|
||||
|
||||
def is_suitable(self, age_limit):
|
||||
""" Test whether the extractor is generally suitable for the given
|
||||
age limit (i.e. pornographic sites are not, all others usually are) """
|
||||
|
||||
any_restricted = False
|
||||
for tc in self.get_testcases(include_onlymatching=False):
|
||||
if 'playlist' in tc:
|
||||
tc = tc['playlist'][0]
|
||||
is_restricted = age_restricted(
|
||||
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||
if not is_restricted:
|
||||
return True
|
||||
any_restricted = any_restricted or is_restricted
|
||||
return not any_restricted
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -228,7 +228,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
|
||||
|
||||
formats = []
|
||||
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
|
||||
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
||||
|
@@ -1,47 +1,45 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DiscoveryIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
||||
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
|
||||
_TEST = {
|
||||
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
||||
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
|
||||
'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
|
||||
'info_dict': {
|
||||
'id': '614784',
|
||||
'ext': 'mp4',
|
||||
'title': 'MythBusters: Mission Impossible Outtakes',
|
||||
'id': 'mission-impossible-outtakes',
|
||||
'ext': 'flv',
|
||||
'title': 'Mission Impossible Outtakes',
|
||||
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
|
||||
' back.'),
|
||||
'duration': 156,
|
||||
'timestamp': 1303099200,
|
||||
'upload_date': '20110418',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
|
||||
webpage, 'video list', flags=re.DOTALL)
|
||||
video_list = json.loads(video_list_json)
|
||||
info = video_list['clips'][0]
|
||||
formats = []
|
||||
for f in info['mp4']:
|
||||
formats.append(
|
||||
{'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})
|
||||
info = self._parse_json(self._search_regex(
|
||||
r'(?s)<script type="application/ld\+json">(.*?)</script>',
|
||||
webpage, 'video info'), video_id)
|
||||
|
||||
return {
|
||||
'id': info['contentId'],
|
||||
'title': video_list['name'],
|
||||
'formats': formats,
|
||||
'description': info['videoCaption'],
|
||||
'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'),
|
||||
'duration': info['duration'],
|
||||
'id': video_id,
|
||||
'title': info['name'],
|
||||
'url': info['contentURL'],
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'timestamp': parse_iso8601(info.get('uploadDate')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
}
|
||||
|
@@ -9,6 +9,9 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class EightTracksIE(InfoExtractor):
|
||||
@@ -112,14 +115,30 @@ class EightTracksIE(InfoExtractor):
|
||||
session = str(random.randint(0, 1000000000))
|
||||
mix_id = data['id']
|
||||
track_count = data['tracks_count']
|
||||
duration = data['duration']
|
||||
avg_song_duration = float(duration) / track_count
|
||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||
next_url = first_url
|
||||
entries = []
|
||||
|
||||
for i in range(track_count):
|
||||
api_json = self._download_webpage(
|
||||
next_url, playlist_id,
|
||||
note='Downloading song information %d/%d' % (i + 1, track_count),
|
||||
errnote='Failed to download song information')
|
||||
|
||||
api_json = None
|
||||
download_tries = 0
|
||||
|
||||
while api_json is None:
|
||||
try:
|
||||
api_json = self._download_webpage(
|
||||
next_url, playlist_id,
|
||||
note='Downloading song information %d/%d' % (i + 1, track_count),
|
||||
errnote='Failed to download song information')
|
||||
except ExtractorError:
|
||||
if download_tries > 3:
|
||||
raise
|
||||
else:
|
||||
download_tries += 1
|
||||
self._sleep(avg_song_duration, playlist_id)
|
||||
|
||||
api_data = json.loads(api_json)
|
||||
track_data = api_data['set']['track']
|
||||
info = {
|
||||
@@ -131,6 +150,7 @@ class EightTracksIE(InfoExtractor):
|
||||
'ext': 'm4a',
|
||||
}
|
||||
entries.append(info)
|
||||
|
||||
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
|
||||
session, mix_id, track_data['id'])
|
||||
return {
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -12,32 +11,49 @@ from ..utils import (
|
||||
|
||||
|
||||
class EllenTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
|
||||
'md5': 'e4af06f3bf0d5f471921a18db5764642',
|
||||
'info_dict': {
|
||||
'id': '0-7jqrsr18',
|
||||
'ext': 'mp4',
|
||||
'title': 'What\'s Wrong with These Photos? A Whole Lot',
|
||||
'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
|
||||
'timestamp': 1406876400,
|
||||
'upload_date': '20140801',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://ellentube.com/videos/0-dvzmabd5/',
|
||||
'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
|
||||
'info_dict': {
|
||||
'id': '0-dvzmabd5',
|
||||
'ext': 'mp4',
|
||||
'title': '1 year old twin sister makes her brother laugh',
|
||||
'description': '1 year old twin sister makes her brother laugh',
|
||||
'timestamp': 1419542075,
|
||||
'upload_date': '20141225',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._html_search_meta('VideoURL', webpage, 'url')
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description') or self._og_search_description(webpage)
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<span class="publish-date"><time datetime="([^"]+)">',
|
||||
webpage, 'timestamp'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': self._html_search_meta('VideoURL', webpage, 'url'),
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
@@ -55,8 +71,7 @@ class EllenTVClipsIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
playlist = self._extract_playlist(webpage)
|
||||
|
@@ -1,8 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
@@ -24,9 +22,7 @@ class ElPaisIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
prefix = self._html_search_regex(
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class FKTVIE(InfoExtractor):
|
||||
IE_NAME = 'fernsehkritik.tv'
|
||||
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
|
||||
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://fernsehkritik.tv/folge-1',
|
||||
@@ -26,29 +26,32 @@ class FKTVIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
episode = int(mobj.group('ep'))
|
||||
episode = int(self._match_id(url))
|
||||
|
||||
server = random.randint(2, 4)
|
||||
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
|
||||
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
|
||||
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode
|
||||
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,
|
||||
episode)
|
||||
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
|
||||
'playlist', flags=re.DOTALL)
|
||||
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
|
||||
# TODO: return a single multipart video
|
||||
|
||||
videos = []
|
||||
for i, _ in enumerate(files, 1):
|
||||
video_id = '%04d%d' % (episode, i)
|
||||
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
|
||||
video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)
|
||||
videos.append({
|
||||
'ext': 'flv',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
|
||||
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
|
||||
'thumbnail': video_thumbnail
|
||||
})
|
||||
return videos
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'entries': videos,
|
||||
'id': 'folge-%s' % episode,
|
||||
}
|
||||
|
||||
|
||||
class FKTVPosteckeIE(InfoExtractor):
|
||||
|
@@ -57,8 +57,7 @@ class GameOneIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
og_video = self._og_search_video_url(webpage, secure=False)
|
||||
|
@@ -39,7 +39,8 @@ class GDCVaultIE(InfoExtractor):
|
||||
'id': '1015301',
|
||||
'ext': 'flv',
|
||||
'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
|
||||
}
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}
|
||||
]
|
||||
|
||||
|
@@ -131,12 +131,13 @@ class GenericIE(InfoExtractor):
|
||||
# ooyala video
|
||||
{
|
||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||
'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
|
||||
'md5': '166dd577b433b4d4ebfee10b0824d8ff',
|
||||
'info_dict': {
|
||||
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
|
||||
'ext': 'mp4',
|
||||
'title': '2cc213299525360.mov', # that's what we get
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
@@ -146,7 +147,7 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130224',
|
||||
'uploader_id': 'TheVerge',
|
||||
'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
|
||||
'description': 're:^Chris Ziegler takes a look at the\.*',
|
||||
'uploader': 'The Verge',
|
||||
'title': 'First Firefox OS phones side-by-side',
|
||||
},
|
||||
@@ -181,6 +182,14 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||
},
|
||||
},
|
||||
# BBC iPlayer embeds
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
|
||||
'info_dict': {
|
||||
'title': 'BBC - Blogs - Adam Curtis - BUGGER',
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
},
|
||||
# RUTV embed
|
||||
{
|
||||
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
||||
@@ -699,9 +708,9 @@ class GenericIE(InfoExtractor):
|
||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||
|
||||
# Helper method
|
||||
def _playlist_from_matches(matches, getter, ie=None):
|
||||
def _playlist_from_matches(matches, getter=None, ie=None):
|
||||
urlrs = orderedSet(
|
||||
self.url_result(self._proto_relative_url(getter(m)), ie)
|
||||
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
||||
for m in matches)
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
@@ -908,7 +917,7 @@ class GenericIE(InfoExtractor):
|
||||
# Look for BBC iPlayer embed
|
||||
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||
if matches:
|
||||
return self.playlist_result([self.url_result(video_url, ie='BBCCoUk') for video_url in matches])
|
||||
return _playlist_from_matches(matches, ie='BBCCoUk')
|
||||
|
||||
# Look for embedded RUTV player
|
||||
rutv_url = RUTVIE._extract_url(webpage)
|
||||
@@ -917,7 +926,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded TED player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'TED')
|
||||
|
||||
|
101
youtube_dl/extractor/giga.py
Normal file
101
youtube_dl/extractor/giga.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
qualities,
|
||||
compat_str,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class GigaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
|
||||
'md5': '6bc5535e945e724640664632055a584f',
|
||||
'info_dict': {
|
||||
'id': '2622086',
|
||||
'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
|
||||
'description': 'md5:afdf5862241aded4718a30dff6a57baf',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 578,
|
||||
'timestamp': 1414749706,
|
||||
'upload_date': '20141031',
|
||||
'uploader': 'Robin Schweiger',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
|
||||
webpage, 'video id')
|
||||
|
||||
playlist = self._download_json(
|
||||
'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
|
||||
% video_id, video_id)[0]
|
||||
|
||||
quality = qualities(['normal', 'hd720'])
|
||||
|
||||
formats = []
|
||||
for format_id in itertools.count(0):
|
||||
fmt = playlist.get(compat_str(format_id))
|
||||
if not fmt:
|
||||
break
|
||||
formats.append({
|
||||
'url': fmt['src'],
|
||||
'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
|
||||
'quality': quality(fmt['quality']),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'title', webpage, 'title', fatal=True)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
|
||||
uploader = self._search_regex(
|
||||
r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -39,8 +39,9 @@ class HuffPostIE(InfoExtractor):
|
||||
data = self._download_json(api_url, video_id)['data']
|
||||
|
||||
video_title = data['title']
|
||||
duration = parse_duration(data['running_time'])
|
||||
upload_date = unified_strdate(data['schedule']['starts_at'])
|
||||
duration = parse_duration(data.get('running_time'))
|
||||
upload_date = unified_strdate(
|
||||
data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
|
||||
description = data.get('description')
|
||||
|
||||
thumbnails = []
|
||||
@@ -59,16 +60,11 @@ class HuffPostIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'url': url,
|
||||
'vcodec': 'none' if key.startswith('audio/') else None,
|
||||
} for key, url in data['sources']['live'].items()]
|
||||
if data.get('fivemin_id'):
|
||||
fid = data['fivemin_id']
|
||||
fcat = str(int(fid) // 100 + 1)
|
||||
furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
|
||||
formats.append({
|
||||
'format': 'fivemin',
|
||||
'url': furl,
|
||||
'preference': 1,
|
||||
})
|
||||
} for key, url in data.get('sources', {}).get('live', {}).items()]
|
||||
|
||||
if not formats and data.get('fivemin_id'):
|
||||
return self.url_result('5min:%s' % data['fivemin_id'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -16,7 +16,6 @@ class ImdbIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||
'md5': '9f34fa777ade3a6e57a054fdbcb3a068',
|
||||
'info_dict': {
|
||||
'id': '2524815897',
|
||||
'ext': 'mp4',
|
||||
|
@@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor):
|
||||
'description': 'The perfect cipher',
|
||||
'duration': 176,
|
||||
'uploader': 'Brit Cruise',
|
||||
'uploader_id': 'khanacademy',
|
||||
'upload_date': '20120411',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
|
||||
'info_dict': {
|
||||
|
@@ -10,13 +10,14 @@ from ..utils import int_or_none
|
||||
class KontrTubeIE(InfoExtractor):
|
||||
IE_NAME = 'kontrtube'
|
||||
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
||||
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
|
||||
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
||||
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
||||
'info_dict': {
|
||||
'id': '2678',
|
||||
'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
|
||||
'ext': 'mp4',
|
||||
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
||||
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
||||
@@ -28,21 +29,28 @@ class KontrTubeIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||
video_url = self._html_search_regex(
|
||||
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'video title')
|
||||
description = self._html_search_meta('description', webpage, 'video description')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'video description')
|
||||
|
||||
mobj = re.search(
|
||||
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
|
||||
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||
webpage)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
|
||||
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
|
||||
webpage, 'view count', fatal=False)
|
||||
|
||||
comment_count = None
|
||||
comment_str = self._html_search_regex(
|
||||
@@ -56,6 +64,7 @@ class KontrTubeIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -28,7 +27,6 @@ class LRTIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -44,7 +42,9 @@ class LRTIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage):
|
||||
data = json.loads(js_to_json(js))
|
||||
data = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||
if 'provider' not in data:
|
||||
continue
|
||||
if data['provider'] == 'rtmp':
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
|
@@ -105,6 +105,9 @@ class OCWMITIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
|
||||
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
|
||||
'upload_date': '20121109',
|
||||
'uploader_id': 'MIT',
|
||||
'uploader': 'MIT OpenCourseWare',
|
||||
# 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
|
||||
}
|
||||
},
|
||||
@@ -114,6 +117,9 @@ class OCWMITIE(InfoExtractor):
|
||||
'id': '7K1sB05pE0A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Session 1: Introduction to Derivatives',
|
||||
'upload_date': '20090818',
|
||||
'uploader_id': 'MIT',
|
||||
'uploader': 'MIT OpenCourseWare',
|
||||
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
||||
# 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
|
||||
}
|
||||
|
@@ -1,63 +1,49 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class MotorsportIE(InfoExtractor):
|
||||
IE_DESC = 'motorsport.com'
|
||||
_VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
|
||||
'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
|
||||
'info_dict': {
|
||||
'id': '7063',
|
||||
'id': '2-T3WuR-KMM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Red Bull Racing: 2014 Rules Explained',
|
||||
'duration': 207,
|
||||
'duration': 208,
|
||||
'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
|
||||
'uploader': 'rainiere',
|
||||
'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
|
||||
}
|
||||
'uploader': 'mcomstaff',
|
||||
'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ',
|
||||
'upload_date': '20140903',
|
||||
'thumbnail': r're:^https?://.+\.jpg$'
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
flashvars_code = self._html_search_regex(
|
||||
r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
|
||||
flashvars = compat_parse_qs(flashvars_code)
|
||||
params = json.loads(flashvars['parameters'][0])
|
||||
|
||||
e = compat_str(int(time.time()) + 24 * 60 * 60)
|
||||
base_video_url = params['location'] + '?e=' + e
|
||||
s = 'h3hg713fh32'
|
||||
h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
|
||||
video_url = base_video_url + '&h=' + h
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
|
||||
'uploader', fatal=False)
|
||||
iframe_path = self._html_search_regex(
|
||||
r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage,
|
||||
'iframe path')
|
||||
iframe = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, iframe_path), display_id,
|
||||
'Downloading iframe')
|
||||
youtube_id = self._search_regex(
|
||||
r'www.youtube.com/embed/(.{11})', iframe, 'youtube id')
|
||||
|
||||
return {
|
||||
'id': params['video_id'],
|
||||
'_type': 'url_transparent',
|
||||
'display_id': display_id,
|
||||
'title': params['title'],
|
||||
'url': video_url,
|
||||
'description': params.get('description'),
|
||||
'thumbnail': params.get('main_thumb'),
|
||||
'duration': int_or_none(params.get('duration')),
|
||||
'uploader': uploader,
|
||||
'url': 'https://youtube.com/watch?v=%s' % youtube_id,
|
||||
}
|
||||
|
86
youtube_dl/extractor/netzkino.py
Normal file
86
youtube_dl/extractor/netzkino.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class NetzkinoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
|
||||
'md5': '92a3f8b76f8d7220acce5377ea5d4873',
|
||||
'info_dict': {
|
||||
'id': 'rakete-zum-mond',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
|
||||
'comments': 'mincount:3',
|
||||
'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
|
||||
'upload_date': '20120813',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1344858571,
|
||||
'age_limit': 12,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
category_id = mobj.group('category')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
|
||||
api_info = self._download_json(api_url, video_id)
|
||||
info = next(
|
||||
p for p in api_info['posts'] if p['slug'] == video_id)
|
||||
custom_fields = info['custom_fields']
|
||||
|
||||
production_js = self._download_webpage(
|
||||
'http://www.netzkino.de/beta/dist/production.min.js', video_id,
|
||||
note='Downloading player code')
|
||||
avo_js = self._search_regex(
|
||||
r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})',
|
||||
production_js, 'URL templates')
|
||||
templates = self._parse_json(
|
||||
avo_js, video_id, transform_source=js_to_json)
|
||||
|
||||
suffix = {
|
||||
'hds': '.mp4/manifest.f4m',
|
||||
'hls': '.mp4/master.m3u8',
|
||||
'pmd': '.mp4',
|
||||
}
|
||||
film_fn = custom_fields['Streaming'][0]
|
||||
formats = [{
|
||||
'format_id': key,
|
||||
'ext': 'mp4',
|
||||
'url': tpl.replace('{}', film_fn) + suffix[key],
|
||||
} for key, tpl in templates.items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
comments = [{
|
||||
'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
|
||||
'id': c['id'],
|
||||
'author': c['name'],
|
||||
'html': c['content'],
|
||||
'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
|
||||
} for c in info.get('comments', [])]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'comments': comments,
|
||||
'title': info['title'],
|
||||
'age_limit': int_or_none(custom_fields.get('FSK')[0]),
|
||||
'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
|
||||
'description': clean_html(info.get('content')),
|
||||
'thumbnail': info.get('thumbnail'),
|
||||
'playlist_title': api_info.get('title'),
|
||||
'playlist_id': category_id,
|
||||
}
|
@@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor):
|
||||
'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
|
||||
'uploader': 'JonTron',
|
||||
'upload_date': '20140125',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -72,7 +72,7 @@ class NRKIE(InfoExtractor):
|
||||
|
||||
|
||||
class NRKTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})'
|
||||
_VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -85,7 +85,7 @@ class NRKTVIE(InfoExtractor):
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'upload_date': '20140523',
|
||||
'duration': 1741.52,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://tv.nrk.no/program/mdfp15000514',
|
||||
@@ -97,39 +97,119 @@ class NRKTVIE(InfoExtractor):
|
||||
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
||||
'upload_date': '20140524',
|
||||
'duration': 4605.0,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
# single playlist video
|
||||
'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'upload_date': '20150106',
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
},
|
||||
{
|
||||
'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '9480285eff92d64f06e02a5367970a7a',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part1',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'upload_date': '20150106',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'upload_date': '20150106',
|
||||
},
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'upload_date': '20150106',
|
||||
'duration': 6947.5199999999995,
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_f4m(self, manifest_url, video_id):
|
||||
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
part_id = mobj.group('part_id')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta('title', page, 'title')
|
||||
description = self._html_search_meta('description', page, 'description')
|
||||
thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
|
||||
duration = float_or_none(
|
||||
self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
|
||||
title = self._html_search_meta(
|
||||
'title', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'data-posterimage="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'rightsfrom', webpage, 'upload date', fatal=False))
|
||||
duration = float_or_none(self._html_search_regex(
|
||||
r'data-duration="([^"]+)"',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
# playlist
|
||||
parts = re.findall(
|
||||
r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage)
|
||||
if parts:
|
||||
entries = []
|
||||
for current_part_id, stream_url, part_title in parts:
|
||||
if part_id and current_part_id != part_id:
|
||||
continue
|
||||
video_part_id = '%s-part%s' % (video_id, current_part_id)
|
||||
formats = self._extract_f4m(stream_url, video_part_id)
|
||||
entries.append({
|
||||
'id': video_part_id,
|
||||
'title': part_title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
})
|
||||
if part_id:
|
||||
if entries:
|
||||
return entries[0]
|
||||
else:
|
||||
playlist = self.playlist_result(entries, video_id, title, description)
|
||||
playlist.update({
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
})
|
||||
return playlist
|
||||
|
||||
formats = []
|
||||
|
||||
f4m_url = re.search(r'data-media="([^"]+)"', page)
|
||||
f4m_url = re.search(r'data-media="([^"]+)"', webpage)
|
||||
if f4m_url:
|
||||
formats.append({
|
||||
'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||
'format_id': 'f4m',
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.extend(self._extract_f4m(f4m_url.group(1), video_id))
|
||||
|
||||
m3u8_url = re.search(r'data-hls-media="([^"]+)"', page)
|
||||
m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
|
||||
if m3u8_url:
|
||||
formats.append({
|
||||
'url': m3u8_url.group(1),
|
||||
'format_id': 'm3u8',
|
||||
})
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -26,6 +26,7 @@ class PlayedIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': 'youtube-dl_test_video.mp4',
|
||||
},
|
||||
'skip': 'Removed for copyright infringement.', # oh wow
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
63
youtube_dl/extractor/radiobremen.py
Normal file
63
youtube_dl/extractor/radiobremen.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class RadioBremenIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)'
|
||||
IE_NAME = 'radiobremen'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720',
|
||||
'info_dict': {
|
||||
'id': '114720',
|
||||
'ext': 'mp4',
|
||||
'duration': 1685,
|
||||
'width': 512,
|
||||
'title': 'buten un binnen vom 22. Dezember',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id
|
||||
meta_doc = self._download_webpage(
|
||||
meta_url, video_id, 'Downloading metadata')
|
||||
title = self._html_search_regex(
|
||||
r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title")
|
||||
description = self._html_search_regex(
|
||||
r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r"Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>",
|
||||
meta_doc, "duration", fatal=False))
|
||||
|
||||
page_doc = self._download_webpage(
|
||||
url, video_id, 'Downloading video information')
|
||||
mobj = re.search(
|
||||
r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)",
|
||||
page_doc)
|
||||
video_url = (
|
||||
"http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" %
|
||||
(video_id, video_id, mobj.group("secret"), mobj.group('width')))
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'width': int(mobj.group("width")),
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnail': mobj.group('thumbnail'),
|
||||
}
|
@@ -8,7 +8,7 @@ from ..utils import parse_duration
|
||||
|
||||
class RtlXlIE(InfoExtractor):
|
||||
IE_NAME = 'rtlxl.nl'
|
||||
_VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
|
||||
_VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||
|
@@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class RutubeEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'rutube:embed'
|
||||
IE_DESC = 'Rutube embedded videos'
|
||||
_VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||
'info_dict': {
|
||||
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20131223',
|
||||
'uploader_id': '297833',
|
||||
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
|
||||
'uploader': 'subziro89 ILya',
|
||||
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Requires ffmpeg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
embed_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, embed_id)
|
||||
|
||||
canonical_url = self._html_search_regex(
|
||||
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
||||
'Canonical URL')
|
||||
return self.url_result(canonical_url, 'Rutube')
|
||||
|
||||
|
||||
class RutubeChannelIE(InfoExtractor):
|
||||
IE_NAME = 'rutube:channel'
|
||||
IE_DESC = 'Rutube channels'
|
||||
|
@@ -24,7 +24,7 @@ class SexyKarmaIE(InfoExtractor):
|
||||
'title': 'Taking a quick pee.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'wildginger7',
|
||||
'upload_date': '20141007',
|
||||
'upload_date': '20141008',
|
||||
'duration': 22,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
@@ -45,6 +45,7 @@ class SexyKarmaIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
|
||||
@@ -61,6 +62,7 @@ class SexyKarmaIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -114,4 +116,5 @@ class SexyKarmaIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
80
youtube_dl/extractor/soulanime.py
Normal file
80
youtube_dl/extractor/soulanime.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
|
||||
|
||||
class SoulAnimeWatchingIE(InfoExtractor):
|
||||
IE_NAME = "soulanime:watching"
|
||||
IE_DESC = "SoulAnime video"
|
||||
_TEST = {
|
||||
'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
|
||||
'md5': '05fae04abf72298098b528e98abf4298',
|
||||
'info_dict': {
|
||||
'id': 'seirei-tsukai-no-blade-dance-episode-9',
|
||||
'ext': 'mp4',
|
||||
'title': 'seirei-tsukai-no-blade-dance-episode-9',
|
||||
'description': 'seirei-tsukai-no-blade-dance-episode-9'
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
domain = mobj.group('domain')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
video_url_encoded = self._html_search_regex(
|
||||
r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
|
||||
video_url = "http://www.soul-anime." + domain + video_url_encoded
|
||||
|
||||
ext_req = HEADRequest(video_url)
|
||||
ext_handle = self._request_webpage(
|
||||
ext_req, video_id, note='Determining extension')
|
||||
ext = urlhandle_detect_ext(ext_handle)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'title': video_id,
|
||||
'description': video_id
|
||||
}
|
||||
|
||||
|
||||
class SoulAnimeSeriesIE(InfoExtractor):
|
||||
IE_NAME = "soulanime:series"
|
||||
IE_DESC = "SoulAnime Series"
|
||||
|
||||
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
|
||||
|
||||
_EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
|
||||
'info_dict': {
|
||||
'id': 'black-rock-shooter-tv'
|
||||
},
|
||||
'playlist_count': 8
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
series_id = mobj.group('id')
|
||||
domain = mobj.group('domain')
|
||||
|
||||
pattern = re.compile(self._EPISODE_REGEX)
|
||||
|
||||
page = self._download_webpage(url, series_id, "Downloading series page")
|
||||
mobj = pattern.findall(page)
|
||||
|
||||
entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
|
||||
|
||||
return self.playlist_result(entries, series_id)
|
@@ -57,9 +57,7 @@ class TeacherTubeIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta('title', webpage, 'title', fatal=True)
|
||||
|
@@ -13,7 +13,7 @@ from ..compat import (
|
||||
class TEDIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?P<proto>https?://)
|
||||
(?P<type>www|embed)(?P<urlmain>\.ted\.com/
|
||||
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
|
||||
(
|
||||
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||
|
|
||||
@@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if m.group('type') == 'embed':
|
||||
if m.group('type').startswith('embed'):
|
||||
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
|
||||
return self.url_result(desktop_url, 'TED')
|
||||
name = m.group('name')
|
||||
|
@@ -1,15 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
"""TF1 uses the wat.tv player."""
|
||||
_VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
|
||||
_TEST = {
|
||||
_VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
|
||||
_TESTS = {
|
||||
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||
'info_dict': {
|
||||
'id': '10635995',
|
||||
@@ -21,14 +19,26 @@ class TF1IE(InfoExtractor):
|
||||
# Sometimes wat serves the whole file with the --test option
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
|
||||
'info_dict': {
|
||||
'id': '12043945',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le grand Mystérioso - Chuggington',
|
||||
'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
|
||||
'upload_date': '20150103',
|
||||
},
|
||||
'params': {
|
||||
# Sometimes wat serves the whole file with the --test option
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._html_search_regex(
|
||||
r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
|
||||
r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
'Downloading embed player page')
|
||||
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
||||
|
@@ -9,7 +9,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class TudouIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
||||
'md5': '140a49ed444bd22f93330985d8475fcb',
|
||||
@@ -27,13 +27,6 @@ class TudouIE(InfoExtractor):
|
||||
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
|
||||
'info_dict': {
|
||||
'title': 'todo.mp4',
|
||||
},
|
||||
'add_ie': ['Youku'],
|
||||
'skip': 'Only works from China'
|
||||
}]
|
||||
|
||||
def _url_for_id(self, id, quality=None):
|
||||
@@ -45,8 +38,7 @@ class TudouIE(InfoExtractor):
|
||||
return final_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(2)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
|
||||
@@ -87,4 +79,9 @@ class TudouIE(InfoExtractor):
|
||||
}
|
||||
result.append(part_info)
|
||||
|
||||
return result
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'entries': result,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
}
|
||||
|
@@ -24,7 +24,7 @@ class TuneInIE(InfoExtractor):
|
||||
_INFO_DICT = {
|
||||
'id': '34682',
|
||||
'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
|
||||
'ext': 'AAC',
|
||||
'ext': 'aac',
|
||||
'thumbnail': 're:^https?://.*\.png$',
|
||||
'location': 'Tacoma, WA',
|
||||
}
|
||||
@@ -78,14 +78,21 @@ class TuneInIE(InfoExtractor):
|
||||
for stream in streams:
|
||||
if stream.get('Type') == 'Live':
|
||||
is_live = True
|
||||
reliability = stream.get('Reliability')
|
||||
format_note = (
|
||||
'Reliability: %d%%' % reliability
|
||||
if reliability is not None else None)
|
||||
formats.append({
|
||||
'preference': (
|
||||
0 if reliability is None or reliability > 90
|
||||
else 1),
|
||||
'abr': stream.get('Bandwidth'),
|
||||
'ext': stream.get('MediaType'),
|
||||
'ext': stream.get('MediaType').lower(),
|
||||
'acodec': stream.get('MediaType'),
|
||||
'vcodec': 'none',
|
||||
'url': stream.get('Url'),
|
||||
# Sometimes streams with the highest quality do not exist
|
||||
'preference': stream.get('Reliability'),
|
||||
'source_preference': reliability,
|
||||
'format_note': format_note,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -1,37 +1,142 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TvpIE(InfoExtractor):
|
||||
IE_NAME = 'tvp.pl'
|
||||
_VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
|
||||
'md5': '148408967a6a468953c0a75cbdaf0d7a',
|
||||
_TESTS = [{
|
||||
'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
|
||||
'info_dict': {
|
||||
'id': '12878238',
|
||||
'id': '4278035',
|
||||
'ext': 'wmv',
|
||||
'title': '31.10.2013 - Odcinek 2',
|
||||
'description': '31.10.2013 - Odcinek 2',
|
||||
'title': 'Ogniem i mieczem, odc. 2',
|
||||
'description': 'Bohun dowiaduje się o złamaniu przez kniahinię danego mu słowa i wyrusza do Rozłogów. Helenie w ostatniej chwili udaje się uciec dzięki pomocy Zagłoby.',
|
||||
},
|
||||
'skip': 'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
|
||||
'info_dict': {
|
||||
'id': '194536',
|
||||
'ext': 'mp4',
|
||||
'title': 'Czas honoru, I seria – odc. 13',
|
||||
# 'description': 'WŁADEK\nCzesław prosi Marię o dostarczenie Władkowi zarazki tyfusu. Jeśli zachoruje zostanie przewieziony do szpitala skąd łatwiej będzie go odbić. Czy matka zdecyduje się zarazić syna? Karol odwiedza Wandę przyznaje się, że ją oszukiwał, ale ostrzega też, że grozi jej aresztowanie i nalega, żeby wyjechała z Warszawy. Czy dziewczyna zdecyduje się znów oddalić od ukochanego? Rozpoczyna się akcja odbicia Władka.',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
||||
'info_dict': {
|
||||
'id': '17916176',
|
||||
'ext': 'mp4',
|
||||
'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': 'true',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
||||
'info_dict': {
|
||||
'id': '17834272',
|
||||
'ext': 'mp4',
|
||||
'title': 'Na sygnale, odc. 39',
|
||||
'description': 'Ekipa Wiktora ratuje młodą matkę, która spadła ze schodów trzymając na rękach noworodka. Okazuje się, że dziewczyna jest surogatką, a biologiczni rodzice dziecka próbują zmusić ją do oddania synka…',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': 'true',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
|
||||
params = self._download_json(
|
||||
json_url, video_id, "Downloading video metadata")
|
||||
video_url = params['video_url']
|
||||
webpage = self._download_webpage(
|
||||
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
series = self._search_regex(
|
||||
r'{name:\s*([\'"])SeriesTitle\1,\s*value:\s*\1(?P<series>.*?)\1},',
|
||||
webpage, 'series', group='series', default=None)
|
||||
if series is not None and series not in title:
|
||||
title = '%s, %s' % (series, title)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
|
||||
if video_url is None:
|
||||
video_url = self._download_json(
|
||||
'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
|
||||
video_id)['video_url']
|
||||
|
||||
ext = video_url.rsplit('.', 1)[-1]
|
||||
if ext != 'ism/manifest':
|
||||
if '/' in ext:
|
||||
ext = 'mp4'
|
||||
formats = [{
|
||||
'format_id': 'direct',
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
}]
|
||||
else:
|
||||
m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'ext': 'wmv',
|
||||
'url': video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class TvpSeriesIE(InfoExtractor):
|
||||
IE_NAME = 'tvp.pl:Series'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem',
|
||||
'info_dict': {
|
||||
'title': 'Ogniem i mieczem',
|
||||
'id': '4278026',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat',
|
||||
'info_dict': {
|
||||
'title': 'Boso przez świat',
|
||||
'id': '9329207',
|
||||
},
|
||||
'playlist_count': 86,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id, tries=5)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s) id=[\'"]path[\'"]>(?:.*? / ){2}(.*?)</span>', webpage, 'series')
|
||||
playlist_id = self._search_regex(r'nodeId:\s*(\d+)', webpage, 'playlist id')
|
||||
playlist = self._download_webpage(
|
||||
'http://vod.tvp.pl/vod/seriesAjax?type=series&nodeId=%s&recommend'
|
||||
'edId=0&sort=&page=0&pageSize=10000' % playlist_id, display_id, tries=5,
|
||||
note='Downloading playlist')
|
||||
|
||||
videos_paths = re.findall(
|
||||
'(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
|
||||
entries = [
|
||||
self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key())
|
||||
for v_path in videos_paths]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
@@ -63,7 +63,7 @@ class VierIE(InfoExtractor):
|
||||
|
||||
class VierVideosIE(InfoExtractor):
|
||||
IE_NAME = 'vier:videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vier.be/demoestuin/videos',
|
||||
'info_dict': {
|
||||
|
@@ -17,7 +17,6 @@ class VikiIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
|
||||
_TEST = {
|
||||
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||
'md5': 'a21454021c2646f5433514177e2caa5f',
|
||||
'info_dict': {
|
||||
'id': '1023585v',
|
||||
'ext': 'mp4',
|
||||
@@ -31,8 +30,7 @@ class VikiIE(SubtitlesInfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
|
@@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor):
|
||||
IE_DESC = 'Vimple.ru'
|
||||
_VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'
|
||||
_TESTS = [
|
||||
# Quality: Large, from iframe
|
||||
{
|
||||
'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c',
|
||||
'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
|
||||
'md5': '2e750a330ed211d3fd41821c6ad9a279',
|
||||
'info_dict': {
|
||||
'id': 'b132bdfd71b546d3972f9ab9a25f201c',
|
||||
'title': 'great-escape-minecraft.flv',
|
||||
'id': 'c0f6b1687dcd4000a97ebe70068039cf',
|
||||
'ext': 'mp4',
|
||||
'duration': 352,
|
||||
'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c',
|
||||
'title': 'Sunset',
|
||||
'duration': 20,
|
||||
'thumbnail': 're:https?://.*?\.jpg',
|
||||
},
|
||||
},
|
||||
# Quality: Medium, from mainpage
|
||||
{
|
||||
'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
|
||||
'info_dict': {
|
||||
'id': 'a15950562888453b8e6f9572dc8600cd',
|
||||
'title': 'DB 01',
|
||||
'ext': 'flv',
|
||||
'duration': 1484,
|
||||
'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -164,6 +164,14 @@ class VKIE(InfoExtractor):
|
||||
self.to_screen('Youtube video detected')
|
||||
return self.url_result(m_yt.group(1), 'Youtube')
|
||||
|
||||
m_rutube = re.search(
|
||||
r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
|
||||
if m_rutube is not None:
|
||||
self.to_screen('rutube video detected')
|
||||
rutube_url = self._proto_relative_url(
|
||||
m_rutube.group(1).replace('\\', ''))
|
||||
return self.url_result(rutube_url)
|
||||
|
||||
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
|
||||
if m_opts:
|
||||
m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
|
||||
|
@@ -10,14 +10,14 @@ from ..utils import (
|
||||
|
||||
|
||||
class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'info_dict': {
|
||||
'title': 'Sinkhole of bureaucracy',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
|
||||
'md5': '79132cc09ec5309fa590ae46e4cc31bc',
|
||||
'info_dict': {
|
||||
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
|
||||
'ext': 'mp4',
|
||||
@@ -29,7 +29,7 @@ class WashingtonPostIE(InfoExtractor):
|
||||
'upload_date': '20140322',
|
||||
},
|
||||
}, {
|
||||
'md5': 'f645a07652c2950cd9134bb852c5f5eb',
|
||||
'md5': 'e1d5734c06865cc504ad99dc2de0d443',
|
||||
'info_dict': {
|
||||
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
|
||||
'ext': 'mp4',
|
||||
@@ -44,10 +44,9 @@ class WashingtonPostIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_id = mobj.group('id')
|
||||
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
|
||||
entries = []
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -67,6 +68,10 @@ class WDRIE(InfoExtractor):
|
||||
'upload_date': '20140717',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
|
||||
'playlist_mincount': 146,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -81,6 +86,27 @@ class WDRIE(InfoExtractor):
|
||||
self.url_result(page_url + href, 'WDR')
|
||||
for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage)
|
||||
]
|
||||
|
||||
if entries: # Playlist page
|
||||
return self.playlist_result(entries, page_id)
|
||||
|
||||
# Overview page
|
||||
entries = []
|
||||
for page_num in itertools.count(2):
|
||||
hrefs = re.findall(
|
||||
r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
|
||||
webpage)
|
||||
entries.extend(
|
||||
self.url_result(page_url + href, 'WDR')
|
||||
for href in hrefs)
|
||||
next_url_m = re.search(
|
||||
r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
|
||||
if not next_url_m:
|
||||
break
|
||||
next_url = page_url + next_url_m.group(1)
|
||||
webpage = self._download_webpage(
|
||||
next_url, page_id,
|
||||
note='Downloading playlist page %d' % page_num)
|
||||
return self.playlist_result(entries, page_id)
|
||||
|
||||
flashvars = compat_parse_qs(
|
||||
@@ -172,8 +198,7 @@ class WDRMausIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
param_code = self._html_search_regex(
|
||||
@@ -224,5 +249,3 @@ class WDRMausIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
# TODO test _1
|
||||
|
102
youtube_dl/extractor/webofstories.py
Normal file
102
youtube_dl/extractor/webofstories.py
Normal file
@@ -0,0 +1,102 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class WebOfStoriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)'
|
||||
_VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
|
||||
_GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
|
||||
_USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.webofstories.com/play/hans.bethe/71',
|
||||
'md5': '373e4dd915f60cfe3116322642ddf364',
|
||||
'info_dict': {
|
||||
'id': '4536',
|
||||
'ext': 'mp4',
|
||||
'title': 'The temperature of the sun',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'Hans Bethe talks about calculating the temperature of the sun',
|
||||
'duration': 238,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.webofstories.com/play/55908',
|
||||
'md5': '2985a698e1fe3211022422c4b5ed962c',
|
||||
'info_dict': {
|
||||
'id': '55908',
|
||||
'ext': 'mp4',
|
||||
'title': 'The story of Gemmata obscuriglobus',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
|
||||
'duration': 169,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
story_filename = self._search_regex(
|
||||
r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
|
||||
speaker_id = self._search_regex(
|
||||
r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
|
||||
story_id = self._search_regex(
|
||||
r'\.storyId\((\d+)\)', webpage, 'story ID')
|
||||
speaker_type = self._search_regex(
|
||||
r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
|
||||
great_life = self._search_regex(
|
||||
r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
|
||||
is_great_life_series = great_life == 'true'
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
|
||||
|
||||
# URL building, see: http://www.webofstories.com/scripts/player.js
|
||||
ms_prefix = ''
|
||||
if speaker_type.lower() == 'ms':
|
||||
ms_prefix = 'mini_sites/'
|
||||
|
||||
if is_great_life_series:
|
||||
mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format(
|
||||
self._VIDEO_DOMAIN, speaker_id, story_filename)
|
||||
rtmp_ext = 'flv'
|
||||
streamer = self._GREAT_LIFE_STREAMER
|
||||
play_path = 'stories/{0:}/{1:}'.format(
|
||||
speaker_id, story_filename)
|
||||
else:
|
||||
mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format(
|
||||
self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename)
|
||||
rtmp_ext = 'mp4'
|
||||
streamer = self._USER_STREAMER
|
||||
play_path = 'mp4:{0:}{1:}/{2}.mp4'.format(
|
||||
ms_prefix, speaker_id, story_filename)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'mp4_sd',
|
||||
'url': mp4_url,
|
||||
}, {
|
||||
'format_id': 'rtmp_sd',
|
||||
'page_url': url,
|
||||
'url': streamer,
|
||||
'ext': rtmp_ext,
|
||||
'play_path': play_path,
|
||||
}]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': story_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
"""Information Extractor for xHamster"""
|
||||
_VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||
@@ -39,7 +39,11 @@ class XHamsterIE(InfoExtractor):
|
||||
'duration': 200,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -57,7 +61,8 @@ class XHamsterIE(InfoExtractor):
|
||||
|
||||
video_id = mobj.group('id')
|
||||
seo = mobj.group('seo')
|
||||
mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
|
||||
proto = mobj.group('proto')
|
||||
mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
|
||||
webpage = self._download_webpage(mrss_url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
|
||||
|
@@ -40,7 +40,7 @@ class XTubeIE(InfoExtractor):
|
||||
r'<p class="title">([^<]+)', webpage, 'title')
|
||||
video_uploader = self._html_search_regex(
|
||||
[r"var\s+contentOwnerId\s*=\s*'([^']+)",
|
||||
r'By:\s*<a href="/community/profile\.php?user=([^"]+)'],
|
||||
r'By:\s*<a href="/community/profile\.php\?user=([^"]+)'],
|
||||
webpage, 'uploader', fatal=False)
|
||||
video_description = self._html_search_regex(
|
||||
r'<p class="fieldsDesc">([^<]+)',
|
||||
@@ -95,6 +95,7 @@ class XTubeUserIE(InfoExtractor):
|
||||
'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
|
||||
'info_dict': {
|
||||
'id': 'greenshowers',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'playlist_mincount': 155,
|
||||
}
|
||||
@@ -124,6 +125,7 @@ class XTubeUserIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': username,
|
||||
'age_limit': 18,
|
||||
'entries': [{
|
||||
'_type': 'url',
|
||||
'url': eurl,
|
||||
|
@@ -256,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
|
||||
@@ -264,9 +264,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
|
||||
|
||||
# Dash mp4 audio
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50},
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50},
|
||||
|
||||
# Dash webm
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
@@ -287,7 +287,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
|
||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
|
||||
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
|
||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
|
||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
||||
@@ -412,7 +414,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'id': 'HtVdAasjOgU',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
|
||||
'description': 'md5:eca57043abae25130f58f655ad9a7771',
|
||||
'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
|
||||
'uploader': 'The Witcher',
|
||||
'uploader_id': 'WitcherGame',
|
||||
'upload_date': '20140605',
|
||||
@@ -463,6 +465,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'skip_download': 'requires avconv',
|
||||
}
|
||||
},
|
||||
# Non-square pixels
|
||||
{
|
||||
'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
|
||||
'info_dict': {
|
||||
'id': '_b-2C3KPAM0',
|
||||
'ext': 'mp4',
|
||||
'stretched_ratio': 16 / 9.,
|
||||
'upload_date': '20110310',
|
||||
'uploader_id': 'AllenMeow',
|
||||
'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
|
||||
'uploader': '孫艾倫',
|
||||
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -736,6 +752,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'width': int_or_none(r.attrib.get('width')),
|
||||
'height': int_or_none(r.attrib.get('height')),
|
||||
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
||||
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
||||
'filesize': filesize,
|
||||
@@ -746,7 +763,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == format_id)
|
||||
except StopIteration:
|
||||
f.update(self._formats.get(format_id, {}))
|
||||
f.update(self._formats.get(format_id, {}).items())
|
||||
formats.append(f)
|
||||
else:
|
||||
existing_format.update(f)
|
||||
@@ -1040,8 +1057,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
self.report_warning(
|
||||
'Skipping DASH manifest: %r' % e, video_id)
|
||||
else:
|
||||
# Hide the formats we found through non-DASH
|
||||
dash_keys = set(df['format_id'] for df in dash_formats)
|
||||
for f in formats:
|
||||
if f['format_id'] in dash_keys:
|
||||
f['format_id'] = 'nondash-%s' % f['format_id']
|
||||
f['preference'] = f.get('preference', 0) - 10000
|
||||
formats.extend(dash_formats)
|
||||
|
||||
# Check for malformed aspect ratio
|
||||
stretched_m = re.search(
|
||||
r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
|
||||
video_webpage)
|
||||
if stretched_m:
|
||||
ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
|
||||
for f in formats:
|
||||
if f.get('vcodec') != 'none':
|
||||
f['stretched_ratio'] = ratio
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@@ -1199,9 +1232,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
if playlist_id.startswith('RD'):
|
||||
# Mixes require a custom extraction process
|
||||
return self._extract_mix(playlist_id)
|
||||
if playlist_id.startswith('TL'):
|
||||
raise ExtractorError('For downloading YouTube.com top lists, use '
|
||||
'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
|
||||
|
||||
url = self._TEMPLATE_URL % playlist_id
|
||||
page = self._download_webpage(url, playlist_id)
|
||||
@@ -1247,49 +1277,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||
|
||||
|
||||
class YoutubeTopListIE(YoutubePlaylistIE):
|
||||
IE_NAME = 'youtube:toplist'
|
||||
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
||||
' (Example: "yttoplist:music:Top Tracks")')
|
||||
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
|
||||
_TESTS = [{
|
||||
'url': 'yttoplist:music:Trending',
|
||||
'playlist_mincount': 5,
|
||||
'skip': 'Only works for logged-in users',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel = mobj.group('chann')
|
||||
title = mobj.group('title')
|
||||
query = compat_urllib_parse.urlencode({'title': title})
|
||||
channel_page = self._download_webpage(
|
||||
'https://www.youtube.com/%s' % channel, title)
|
||||
link = self._html_search_regex(
|
||||
r'''(?x)
|
||||
<a\s+href="([^"]+)".*?>\s*
|
||||
<span\s+class="branded-page-module-title-text">\s*
|
||||
<span[^>]*>.*?%s.*?</span>''' % re.escape(query),
|
||||
channel_page, 'list')
|
||||
url = compat_urlparse.urljoin('https://www.youtube.com/', link)
|
||||
|
||||
video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
|
||||
ids = []
|
||||
# sometimes the webpage doesn't contain the videos
|
||||
# retry until we get them
|
||||
for i in itertools.count(0):
|
||||
msg = 'Downloading Youtube mix'
|
||||
if i > 0:
|
||||
msg += ', retry #%d' % i
|
||||
|
||||
webpage = self._download_webpage(url, title, msg)
|
||||
ids = orderedSet(re.findall(video_re, webpage))
|
||||
if ids:
|
||||
break
|
||||
url_results = self._ids_to_results(ids)
|
||||
return self.playlist_result(url_results, playlist_title=title)
|
||||
|
||||
|
||||
class YoutubeChannelIE(InfoExtractor):
|
||||
IE_DESC = 'YouTube.com channels'
|
||||
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
|
||||
|
@@ -119,7 +119,7 @@ class ZDFChannelIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1586442',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
'playlist_count': 3,
|
||||
}
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
|
@@ -267,10 +267,12 @@ def parseOpts(overrideArguments=None):
|
||||
action='store', dest='format', metavar='FORMAT', default=None,
|
||||
help=(
|
||||
'video format code, specify the order of preference using'
|
||||
' slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also'
|
||||
' supported. You can also use the special names "best",'
|
||||
' "bestvideo", "bestaudio", "worst", "worstvideo" and'
|
||||
' "worstaudio". By default, youtube-dl will pick the best quality.'
|
||||
' slashes, as in -f 22/17/18 . '
|
||||
' Instead of format codes, you can select by extension for the '
|
||||
'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
|
||||
'You can also use the special names "best",'
|
||||
' "bestvideo", "bestaudio", "worst". '
|
||||
' By default, youtube-dl will pick the best quality.'
|
||||
' Use commas to download multiple audio formats, such as'
|
||||
' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'
|
||||
' You can merge the video and audio of two formats into a single'
|
||||
@@ -300,6 +302,12 @@ def parseOpts(overrideArguments=None):
|
||||
'--youtube-skip-dash-manifest',
|
||||
action='store_false', dest='youtube_include_dash_manifest',
|
||||
help='Do not download the DASH manifest on YouTube videos')
|
||||
video_format.add_option(
|
||||
'--merge-output-format',
|
||||
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
|
||||
help=(
|
||||
'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.'
|
||||
'Ignored if no merge is required'))
|
||||
|
||||
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
|
||||
subtitles.add_option(
|
||||
@@ -443,6 +451,11 @@ def parseOpts(overrideArguments=None):
|
||||
'-J', '--dump-single-json',
|
||||
action='store_true', dest='dump_single_json', default=False,
|
||||
help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
|
||||
verbosity.add_option(
|
||||
'--print-json',
|
||||
action='store_true', dest='print_json', default=False,
|
||||
help='Be quiet and print the video information as JSON (video is still being downloaded).',
|
||||
)
|
||||
verbosity.add_option(
|
||||
'--newline',
|
||||
action='store_true', dest='progress_with_newline', default=False,
|
||||
@@ -618,6 +631,13 @@ def parseOpts(overrideArguments=None):
|
||||
'--xattrs',
|
||||
action='store_true', dest='xattrs', default=False,
|
||||
help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
|
||||
postproc.add_option(
|
||||
'--fixup',
|
||||
metavar='POLICY', dest='fixup', default='detect_or_warn',
|
||||
help='(experimental) Automatically correct known faults of the file. '
|
||||
'One of never (do nothing), warn (only emit a warning), '
|
||||
'detect_or_warn(check whether we can do anything about it, warn '
|
||||
'otherwise')
|
||||
postproc.add_option(
|
||||
'--prefer-avconv',
|
||||
action='store_false', dest='prefer_ffmpeg',
|
||||
|
@@ -6,6 +6,7 @@ from .ffmpeg import (
|
||||
FFmpegAudioFixPP,
|
||||
FFmpegEmbedSubtitlePP,
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegFixupStretchedPP,
|
||||
FFmpegMergerPP,
|
||||
FFmpegMetadataPP,
|
||||
FFmpegVideoConvertorPP,
|
||||
@@ -24,6 +25,7 @@ __all__ = [
|
||||
'FFmpegAudioFixPP',
|
||||
'FFmpegEmbedSubtitlePP',
|
||||
'FFmpegExtractAudioPP',
|
||||
'FFmpegFixupStretchedPP',
|
||||
'FFmpegMergerPP',
|
||||
'FFmpegMetadataPP',
|
||||
'FFmpegPostProcessor',
|
||||
|
@@ -50,6 +50,10 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
|
||||
return dict((p, get_exe_version(p, args=['-version'])) for p in programs)
|
||||
|
||||
@property
|
||||
def available(self):
|
||||
return self._executable is not None
|
||||
|
||||
@property
|
||||
def _executable(self):
|
||||
if self._downloader.params.get('prefer_ffmpeg', False):
|
||||
@@ -80,8 +84,9 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
|
||||
files_cmd = []
|
||||
for path in input_paths:
|
||||
files_cmd.extend(['-i', encodeFilename(path, True)])
|
||||
cmd = ([self._executable, '-y'] + files_cmd
|
||||
files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
|
||||
cmd = ([encodeFilename(self._executable, True), encodeArgument('-y')] +
|
||||
files_cmd
|
||||
+ [encodeArgument(o) for o in opts] +
|
||||
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
|
||||
|
||||
@@ -122,8 +127,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
|
||||
try:
|
||||
cmd = [
|
||||
self._probe_executable,
|
||||
'-show_streams',
|
||||
encodeFilename(self._probe_executable, True),
|
||||
encodeArgument('-show_streams'),
|
||||
encodeFilename(self._ffmpeg_filename_argument(path), True)]
|
||||
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
|
||||
output = handle.communicate()[0]
|
||||
@@ -520,7 +525,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
class FFmpegMergerPP(FFmpegPostProcessor):
|
||||
def run(self, info):
|
||||
filename = info['filepath']
|
||||
args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest']
|
||||
args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
|
||||
self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
|
||||
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
|
||||
return True, info
|
||||
@@ -539,3 +544,22 @@ class FFmpegAudioFixPP(FFmpegPostProcessor):
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
return True, info
|
||||
|
||||
|
||||
class FFmpegFixupStretchedPP(FFmpegPostProcessor):
|
||||
def run(self, info):
|
||||
stretched_ratio = info.get('stretched_ratio')
|
||||
if stretched_ratio is None or stretched_ratio == 1:
|
||||
return
|
||||
|
||||
filename = info['filepath']
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
|
||||
options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
|
||||
self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
|
||||
self.run_ffmpeg(filename, temp_filename, options)
|
||||
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
return True, info
|
||||
|
@@ -13,6 +13,7 @@ from .compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .utils import make_HTTPS_handler
|
||||
from .version import __version__
|
||||
|
||||
|
||||
@@ -58,9 +59,12 @@ def update_self(to_screen, verbose):
|
||||
to_screen('It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
|
||||
return
|
||||
|
||||
https_handler = make_HTTPS_handler(False)
|
||||
opener = compat_urllib_request.build_opener(https_handler)
|
||||
|
||||
# Check if there is a new version
|
||||
try:
|
||||
newversion = compat_urllib_request.urlopen(VERSION_URL).read().decode('utf-8').strip()
|
||||
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
|
||||
except:
|
||||
if verbose:
|
||||
to_screen(compat_str(traceback.format_exc()))
|
||||
@@ -72,7 +76,7 @@ def update_self(to_screen, verbose):
|
||||
|
||||
# Download and check versions info
|
||||
try:
|
||||
versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8')
|
||||
versions_info = opener.open(JSON_URL).read().decode('utf-8')
|
||||
versions_info = json.loads(versions_info)
|
||||
except:
|
||||
if verbose:
|
||||
@@ -120,7 +124,7 @@ def update_self(to_screen, verbose):
|
||||
return
|
||||
|
||||
try:
|
||||
urlh = compat_urllib_request.urlopen(version['exe'][0])
|
||||
urlh = opener.open(version['exe'][0])
|
||||
newcontent = urlh.read()
|
||||
urlh.close()
|
||||
except (IOError, OSError):
|
||||
@@ -166,7 +170,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
|
||||
# Zip unix package
|
||||
elif isinstance(globals().get('__loader__'), zipimporter):
|
||||
try:
|
||||
urlh = compat_urllib_request.urlopen(version['bin'][0])
|
||||
urlh = opener.open(version['bin'][0])
|
||||
newcontent = urlh.read()
|
||||
urlh.close()
|
||||
except (IOError, OSError):
|
||||
|
@@ -205,6 +205,10 @@ def get_element_by_attribute(attribute, value, html):
|
||||
|
||||
def clean_html(html):
|
||||
"""Clean an HTML snippet into a readable string"""
|
||||
|
||||
if html is None: # Convenience for sanitizing descriptions etc.
|
||||
return html
|
||||
|
||||
# Newline vs <br />
|
||||
html = html.replace('\n', ' ')
|
||||
html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
|
||||
@@ -1550,3 +1554,23 @@ def ytdl_is_updateable():
|
||||
def args_to_str(args):
|
||||
# Get a short string representation for a subprocess command
|
||||
return ' '.join(shlex_quote(a) for a in args)
|
||||
|
||||
|
||||
def urlhandle_detect_ext(url_handle):
|
||||
try:
|
||||
url_handle.headers
|
||||
getheader = lambda h: url_handle.headers[h]
|
||||
except AttributeError: # Python < 3
|
||||
getheader = url_handle.info().getheader
|
||||
|
||||
return getheader('Content-Type').split("/")[1]
|
||||
|
||||
|
||||
def age_restricted(content_limit, age_limit):
|
||||
""" Returns True iff the content should be blocked """
|
||||
|
||||
if age_limit is None: # No limit set
|
||||
return False
|
||||
if content_limit is None:
|
||||
return False # Content available for everyone
|
||||
return age_limit < content_limit
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.01.02'
|
||||
__version__ = '2015.01.10'
|
||||
|
Reference in New Issue
Block a user