Compare commits
180 Commits
2014.11.21
...
2014.12.06
Author | SHA1 | Date | |
---|---|---|---|
![]() |
1a7c6c69d3 | ||
![]() |
045c48847a | ||
![]() |
90644a6843 | ||
![]() |
122c2f87c1 | ||
![]() |
a154eb3d15 | ||
![]() |
81028ff9eb | ||
![]() |
e8df5cee12 | ||
![]() |
ab07963b5c | ||
![]() |
7e26084d09 | ||
![]() |
4349c07dd7 | ||
![]() |
1139a54d9b | ||
![]() |
b128c9ed68 | ||
![]() |
9776bc7f57 | ||
![]() |
e703fc66c2 | ||
![]() |
39c52bbd32 | ||
![]() |
6219802165 | ||
![]() |
8b97115358 | ||
![]() |
810fb84d5e | ||
![]() |
5f5e993dc6 | ||
![]() |
191cc41ba4 | ||
![]() |
abe70fa044 | ||
![]() |
7f142293df | ||
![]() |
d4e06d4a83 | ||
![]() |
ecd7ea1e6b | ||
![]() |
b92c548693 | ||
![]() |
eecd6a467d | ||
![]() |
dce2a3cf9e | ||
![]() |
9095aa38ac | ||
![]() |
0403b06985 | ||
![]() |
de9bd74bc2 | ||
![]() |
233d37fb6b | ||
![]() |
c627f7d48c | ||
![]() |
163c8babaa | ||
![]() |
6708542099 | ||
![]() |
ea2ee40357 | ||
![]() |
62d8b56655 | ||
![]() |
c492970b4b | ||
![]() |
ac5633592a | ||
![]() |
706d7d4ee7 | ||
![]() |
752c8c9b76 | ||
![]() |
b1399a144d | ||
![]() |
05177b34a6 | ||
![]() |
c41a9650c3 | ||
![]() |
df015c69ea | ||
![]() |
1434bffa1f | ||
![]() |
94aa25b995 | ||
![]() |
d128cfe393 | ||
![]() |
954f36f890 | ||
![]() |
19e92770c9 | ||
![]() |
95c673a148 | ||
![]() |
a196a53265 | ||
![]() |
3266f0c68e | ||
![]() |
1940fadd53 | ||
![]() |
03fd72d996 | ||
![]() |
f2b44a2513 | ||
![]() |
c522adb1f0 | ||
![]() |
7160532d41 | ||
![]() |
4e62ebe250 | ||
![]() |
4472f84f0c | ||
![]() |
b766eb2707 | ||
![]() |
10a404c335 | ||
![]() |
c056efa2e3 | ||
![]() |
283ac8d592 | ||
![]() |
313d4572ce | ||
![]() |
42939b6129 | ||
![]() |
37ea8164d3 | ||
![]() |
8c810a7db3 | ||
![]() |
248a0b890f | ||
![]() |
96b7c7fe3f | ||
![]() |
e987e91fcc | ||
![]() |
cb6444e197 | ||
![]() |
93b8a10e3b | ||
![]() |
4207558e8b | ||
![]() |
ad0d800fc3 | ||
![]() |
e232f787f6 | ||
![]() |
155f9550c0 | ||
![]() |
72476fcc42 | ||
![]() |
29e950f7c8 | ||
![]() |
7c8ea53b96 | ||
![]() |
dcddc10a50 | ||
![]() |
a1008af412 | ||
![]() |
61c0663c1e | ||
![]() |
81a7a521c5 | ||
![]() |
e293711802 | ||
![]() |
ceb3367320 | ||
![]() |
a03aaaed2e | ||
![]() |
e075a44afb | ||
![]() |
8865bdeb37 | ||
![]() |
3aa578cad2 | ||
![]() |
d3b5101a91 | ||
![]() |
5c32110114 | ||
![]() |
24144e3b8d | ||
![]() |
b3034f9df7 | ||
![]() |
4c6d2ff8dc | ||
![]() |
faf3494894 | ||
![]() |
535a66ef66 | ||
![]() |
5c40bba82f | ||
![]() |
855dc479c2 | ||
![]() |
0792d5634e | ||
![]() |
e91cdcae1a | ||
![]() |
27e1400f55 | ||
![]() |
e0938e7731 | ||
![]() |
b72823a0a4 | ||
![]() |
673cf0e773 | ||
![]() |
f8aace93cd | ||
![]() |
80310134e0 | ||
![]() |
4d2d638df4 | ||
![]() |
0e44f90e18 | ||
![]() |
15938ab67a | ||
![]() |
ab4ee31eb1 | ||
![]() |
b061ea6e9f | ||
![]() |
4aae94f9d0 | ||
![]() |
acda92f6bc | ||
![]() |
ddfd0f2727 | ||
![]() |
d0720e7118 | ||
![]() |
4e262a8838 | ||
![]() |
b9ed3af343 | ||
![]() |
63c9b2c1d9 | ||
![]() |
65f3a228b1 | ||
![]() |
3004ae2c3a | ||
![]() |
d9836a5917 | ||
![]() |
be64b5b098 | ||
![]() |
c3e74731c2 | ||
![]() |
c920d7f00d | ||
![]() |
0bbf12239c | ||
![]() |
70d68eb46f | ||
![]() |
c553fe5d29 | ||
![]() |
f0c3d729d7 | ||
![]() |
1cdedfee10 | ||
![]() |
93129d9442 | ||
![]() |
e8c8653e9d | ||
![]() |
fab89c67c5 | ||
![]() |
3d960a22fa | ||
![]() |
51bbb084d3 | ||
![]() |
2c25a2bd29 | ||
![]() |
355682be01 | ||
![]() |
00e9d396ab | ||
![]() |
14d4e90eb1 | ||
![]() |
b74e86f48a | ||
![]() |
3d36cea4ac | ||
![]() |
380b822003 | ||
![]() |
b66e699877 | ||
![]() |
27f8b0994e | ||
![]() |
e311b6389a | ||
![]() |
fab6d4c048 | ||
![]() |
4ffc31033e | ||
![]() |
c1777d5cb3 | ||
![]() |
9e1a5b8455 | ||
![]() |
784b6d3a9b | ||
![]() |
c66bdc4869 | ||
![]() |
2514d2635e | ||
![]() |
8bcc875676 | ||
![]() |
5f6a1245ff | ||
![]() |
f3a3407226 | ||
![]() |
598c218f7b | ||
![]() |
4698b14b76 | ||
![]() |
835a22ef3f | ||
![]() |
7d4111ed14 | ||
![]() |
d37cab2a9d | ||
![]() |
d16abf434a | ||
![]() |
a8363f3ab7 | ||
![]() |
010cd3a3ee | ||
![]() |
b9042def9d | ||
![]() |
aa79ac0c82 | ||
![]() |
88125905cf | ||
![]() |
dd60be2bf9 | ||
![]() |
119b3caa46 | ||
![]() |
49f0da7ae1 | ||
![]() |
2cead7e7bc | ||
![]() |
9262867e86 | ||
![]() |
b9272e8f8f | ||
![]() |
021a0db8f7 | ||
![]() |
e1e8b6897b | ||
![]() |
53d1cd1f77 | ||
![]() |
cad985ab4d | ||
![]() |
c52331f30c | ||
![]() |
42e1ff8665 | ||
![]() |
02a12f9fe6 | ||
![]() |
6fcd6e0e21 | ||
![]() |
469d4c8968 |
8
AUTHORS
8
AUTHORS
@@ -82,3 +82,11 @@ Xavier Beynon
|
||||
Gabriel Schubiner
|
||||
xantares
|
||||
Jan Matějka
|
||||
Mauroy Sébastien
|
||||
William Sewell
|
||||
Dao Hoang Son
|
||||
Oskar Jauch
|
||||
Matthew Rayfield
|
||||
t0mm0
|
||||
Tithen-Firion
|
||||
Zack Fernandes
|
||||
|
16
README.md
16
README.md
@@ -30,7 +30,7 @@ Alternatively, refer to the developer instructions below for how to check out an
|
||||
# DESCRIPTION
|
||||
**youtube-dl** is a small command-line program to download videos from
|
||||
YouTube.com and a few more sites. It requires the Python interpreter, version
|
||||
2.6, 2.7, or 3.3+, and it is not platform specific. It should work on
|
||||
2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
|
||||
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
||||
which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
@@ -65,10 +65,10 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: do not read the user
|
||||
configuration in ~/.config/youtube-dl.conf
|
||||
(%APPDATA%/youtube-dl/config.txt on
|
||||
Windows)
|
||||
/youtube-dl.conf: Do not read the user
|
||||
configuration in ~/.config/youtube-
|
||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||
on Windows)
|
||||
--flat-playlist Do not extract the videos of a playlist,
|
||||
only list them.
|
||||
|
||||
@@ -93,7 +93,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
COUNT views
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--no-playlist download only the currently playing video
|
||||
--no-playlist If the URL refers to a video and a
|
||||
playlist, download only the video.
|
||||
--age-limit YEARS download only videos suitable for the given
|
||||
age
|
||||
--download-archive FILE Download only videos not listed in the
|
||||
@@ -492,14 +493,15 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
```
|
||||
|
@@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
@@ -9,6 +11,7 @@ import youtube_dl
|
||||
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
|
||||
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
|
||||
|
||||
|
||||
def build_completion(opt_parser):
|
||||
opts_flag = []
|
||||
for group in opt_parser.option_groups:
|
||||
|
@@ -233,6 +233,7 @@ def rmtree(path):
|
||||
|
||||
#==============================================================================
|
||||
|
||||
|
||||
class BuildError(Exception):
|
||||
def __init__(self, output, code=500):
|
||||
self.output = output
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
"""
|
||||
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
|
||||
|
@@ -23,13 +23,13 @@ EXTRA_ARGS = {
|
||||
'batch-file': ['--require-parameter'],
|
||||
}
|
||||
|
||||
|
||||
def build_completion(opt_parser):
|
||||
commands = []
|
||||
|
||||
for group in opt_parser.option_groups:
|
||||
for option in group.option_list:
|
||||
long_option = option.get_opt_string().strip('-')
|
||||
help_msg = shell_quote([option.help])
|
||||
complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
|
||||
if option._short_opts:
|
||||
complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
@@ -1,8 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import urllib.request
|
||||
import json
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import unicode_literals, with_statement
|
||||
|
||||
import rsa
|
||||
import json
|
||||
@@ -29,4 +30,5 @@ signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).enc
|
||||
print('signature: ' + signature)
|
||||
|
||||
versions_info['signature'] = signature
|
||||
json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)
|
||||
with open('update/versions.json', 'w') as versionsf:
|
||||
json.dump(versions_info, versionsf, indent=4, sort_keys=True)
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import with_statement
|
||||
from __future__ import with_statement, unicode_literals
|
||||
|
||||
import datetime
|
||||
import glob
|
||||
@@ -13,7 +13,7 @@ year = str(datetime.datetime.now().year)
|
||||
for fn in glob.glob('*.html*'):
|
||||
with io.open(fn, encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
newc = re.sub(u'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', u'Copyright © 2006-' + year, content)
|
||||
newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
|
||||
if content != newc:
|
||||
tmpFn = fn + '.part'
|
||||
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import io
|
||||
@@ -73,4 +74,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str)
|
||||
|
||||
with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file:
|
||||
atom_file.write(atom_template)
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import sys
|
||||
import os
|
||||
@@ -9,6 +10,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
|
||||
|
||||
import youtube_dl
|
||||
|
||||
|
||||
def main():
|
||||
with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
|
||||
template = tmplf.read()
|
||||
@@ -21,7 +23,7 @@ def main():
|
||||
continue
|
||||
elif ie_desc is not None:
|
||||
ie_html += ': {}'.format(ie.IE_DESC)
|
||||
if ie.working() == False:
|
||||
if not ie.working():
|
||||
ie_html += ' (Currently broken)'
|
||||
ie_htmls.append('<li>{}</li>'.format(ie_html))
|
||||
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import sys
|
||||
import re
|
||||
|
@@ -1,3 +1,4 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import os.path
|
||||
|
@@ -1,40 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys, os
|
||||
|
||||
try:
|
||||
import urllib.request as compat_urllib_request
|
||||
except ImportError: # Python 2
|
||||
import urllib2 as compat_urllib_request
|
||||
|
||||
sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
|
||||
sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
|
||||
sys.stderr.write(u'The new location of the binaries is https://github.com/rg3/youtube-dl/downloads, not the git repository.\n\n')
|
||||
|
||||
try:
|
||||
raw_input()
|
||||
except NameError: # Python 3
|
||||
input()
|
||||
|
||||
filename = sys.argv[0]
|
||||
|
||||
API_URL = "https://api.github.com/repos/rg3/youtube-dl/downloads"
|
||||
BIN_URL = "https://github.com/downloads/rg3/youtube-dl/youtube-dl"
|
||||
|
||||
if not os.access(filename, os.W_OK):
|
||||
sys.exit('ERROR: no write permissions on %s' % filename)
|
||||
|
||||
try:
|
||||
urlh = compat_urllib_request.urlopen(BIN_URL)
|
||||
newcontent = urlh.read()
|
||||
urlh.close()
|
||||
except (IOError, OSError) as err:
|
||||
sys.exit('ERROR: unable to download latest version')
|
||||
|
||||
try:
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(newcontent)
|
||||
except (IOError, OSError) as err:
|
||||
sys.exit('ERROR: unable to overwrite current version')
|
||||
|
||||
sys.stderr.write(u'Done! Now you can run youtube-dl.\n')
|
@@ -1,12 +0,0 @@
|
||||
from distutils.core import setup
|
||||
import py2exe
|
||||
|
||||
py2exe_options = {
|
||||
"bundle_files": 1,
|
||||
"compressed": 1,
|
||||
"optimize": 2,
|
||||
"dist_dir": '.',
|
||||
"dll_excludes": ['w9xpopen.exe']
|
||||
}
|
||||
|
||||
setup(console=['youtube-dl.py'], options={ "py2exe": py2exe_options }, zipfile=None)
|
@@ -1,102 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys, os
|
||||
import urllib2
|
||||
import json, hashlib
|
||||
|
||||
def rsa_verify(message, signature, key):
|
||||
from struct import pack
|
||||
from hashlib import sha256
|
||||
from sys import version_info
|
||||
def b(x):
|
||||
if version_info[0] == 2: return x
|
||||
else: return x.encode('latin1')
|
||||
assert(type(message) == type(b('')))
|
||||
block_size = 0
|
||||
n = key[0]
|
||||
while n:
|
||||
block_size += 1
|
||||
n >>= 8
|
||||
signature = pow(int(signature, 16), key[1], key[0])
|
||||
raw_bytes = []
|
||||
while signature:
|
||||
raw_bytes.insert(0, pack("B", signature & 0xFF))
|
||||
signature >>= 8
|
||||
signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
|
||||
if signature[0:2] != b('\x00\x01'): return False
|
||||
signature = signature[2:]
|
||||
if not b('\x00') in signature: return False
|
||||
signature = signature[signature.index(b('\x00'))+1:]
|
||||
if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
|
||||
signature = signature[19:]
|
||||
if signature != sha256(message).digest(): return False
|
||||
return True
|
||||
|
||||
sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
|
||||
sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
|
||||
sys.stderr.write(u'From now on, get the binaries from http://rg3.github.com/youtube-dl/download.html, not from the git repository.\n\n')
|
||||
|
||||
raw_input()
|
||||
|
||||
filename = sys.argv[0]
|
||||
|
||||
UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
|
||||
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
|
||||
JSON_URL = UPDATE_URL + 'versions.json'
|
||||
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
|
||||
|
||||
if not os.access(filename, os.W_OK):
|
||||
sys.exit('ERROR: no write permissions on %s' % filename)
|
||||
|
||||
exe = os.path.abspath(filename)
|
||||
directory = os.path.dirname(exe)
|
||||
if not os.access(directory, os.W_OK):
|
||||
sys.exit('ERROR: no write permissions on %s' % directory)
|
||||
|
||||
try:
|
||||
versions_info = urllib2.urlopen(JSON_URL).read().decode('utf-8')
|
||||
versions_info = json.loads(versions_info)
|
||||
except:
|
||||
sys.exit(u'ERROR: can\'t obtain versions info. Please try again later.')
|
||||
if not 'signature' in versions_info:
|
||||
sys.exit(u'ERROR: the versions file is not signed or corrupted. Aborting.')
|
||||
signature = versions_info['signature']
|
||||
del versions_info['signature']
|
||||
if not rsa_verify(json.dumps(versions_info, sort_keys=True), signature, UPDATES_RSA_KEY):
|
||||
sys.exit(u'ERROR: the versions file signature is invalid. Aborting.')
|
||||
|
||||
version = versions_info['versions'][versions_info['latest']]
|
||||
|
||||
try:
|
||||
urlh = urllib2.urlopen(version['exe'][0])
|
||||
newcontent = urlh.read()
|
||||
urlh.close()
|
||||
except (IOError, OSError) as err:
|
||||
sys.exit('ERROR: unable to download latest version')
|
||||
|
||||
newcontent_hash = hashlib.sha256(newcontent).hexdigest()
|
||||
if newcontent_hash != version['exe'][1]:
|
||||
sys.exit(u'ERROR: the downloaded file hash does not match. Aborting.')
|
||||
|
||||
try:
|
||||
with open(exe + '.new', 'wb') as outf:
|
||||
outf.write(newcontent)
|
||||
except (IOError, OSError) as err:
|
||||
sys.exit(u'ERROR: unable to write the new version')
|
||||
|
||||
try:
|
||||
bat = os.path.join(directory, 'youtube-dl-updater.bat')
|
||||
b = open(bat, 'w')
|
||||
b.write("""
|
||||
echo Updating youtube-dl...
|
||||
ping 127.0.0.1 -n 5 -w 1000 > NUL
|
||||
move /Y "%s.new" "%s"
|
||||
del "%s"
|
||||
\n""" %(exe, exe, bat))
|
||||
b.close()
|
||||
|
||||
os.startfile(bat)
|
||||
except (IOError, OSError) as err:
|
||||
sys.exit('ERROR: unable to overwrite current version')
|
||||
|
||||
sys.stderr.write(u'Done! Now you can run youtube-dl.\n')
|
@@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
5
setup.py
5
setup.py
@@ -4,7 +4,6 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import os.path
|
||||
import pkg_resources
|
||||
import warnings
|
||||
import sys
|
||||
|
||||
@@ -103,7 +102,9 @@ setup(
|
||||
"Programming Language :: Python :: 2.6",
|
||||
"Programming Language :: Python :: 2.7",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.3"
|
||||
"Programming Language :: Python :: 3.2",
|
||||
"Programming Language :: Python :: 3.3",
|
||||
"Programming Language :: Python :: 3.4",
|
||||
],
|
||||
|
||||
**params
|
||||
|
@@ -72,8 +72,10 @@ class FakeYDL(YoutubeDL):
|
||||
def expect_warning(self, regex):
|
||||
# Silence an expected warning matching a regex
|
||||
old_report_warning = self.report_warning
|
||||
|
||||
def report_warning(self, message):
|
||||
if re.match(regex, message): return
|
||||
if re.match(regex, message):
|
||||
return
|
||||
old_report_warning(message)
|
||||
self.report_warning = types.MethodType(report_warning, self)
|
||||
|
||||
@@ -139,7 +141,7 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
if missing_keys:
|
||||
def _repr(v):
|
||||
if isinstance(v, compat_str):
|
||||
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'")
|
||||
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
|
||||
else:
|
||||
return repr(v)
|
||||
info_dict_str = ''.join(
|
||||
|
@@ -266,6 +266,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
'ext': 'mp4',
|
||||
'width': None,
|
||||
}
|
||||
|
||||
def fname(templ):
|
||||
ydl = YoutubeDL({'outtmpl': templ})
|
||||
return ydl.prepare_filename(info)
|
||||
|
@@ -40,18 +40,22 @@ from youtube_dl.extractor import get_info_extractor
|
||||
|
||||
RETRIES = 3
|
||||
|
||||
|
||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.to_stderr = self.to_screen
|
||||
self.processed_info_dicts = []
|
||||
super(YoutubeDL, self).__init__(*args, **kwargs)
|
||||
|
||||
def report_warning(self, message):
|
||||
# Don't accept warnings during tests
|
||||
raise ExtractorError(message)
|
||||
|
||||
def process_info(self, info_dict):
|
||||
self.processed_info_dicts.append(info_dict)
|
||||
return super(YoutubeDL, self).process_info(info_dict)
|
||||
|
||||
|
||||
def _file_md5(fn):
|
||||
with open(fn, 'rb') as f:
|
||||
return hashlib.md5(f.read()).hexdigest()
|
||||
@@ -61,10 +65,13 @@ defs = gettestcases()
|
||||
|
||||
class TestDownload(unittest.TestCase):
|
||||
maxDiff = None
|
||||
|
||||
def setUp(self):
|
||||
self.defs = defs
|
||||
|
||||
### Dynamically generate tests
|
||||
# Dynamically generate tests
|
||||
|
||||
|
||||
def generator(test_case):
|
||||
|
||||
def test_template(self):
|
||||
@@ -90,7 +97,7 @@ def generator(test_case):
|
||||
return
|
||||
for other_ie in other_ies:
|
||||
if not other_ie.working():
|
||||
print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
|
||||
print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
|
||||
return
|
||||
|
||||
params = get_params(test_case.get('params', {}))
|
||||
@@ -101,6 +108,7 @@ def generator(test_case):
|
||||
ydl = YoutubeDL(params, auto_init=False)
|
||||
ydl.add_default_info_extractors()
|
||||
finished_hook_called = set()
|
||||
|
||||
def _hook(status):
|
||||
if status['status'] == 'finished':
|
||||
finished_hook_called.add(status['filename'])
|
||||
@@ -111,6 +119,7 @@ def generator(test_case):
|
||||
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
||||
|
||||
res_dict = None
|
||||
|
||||
def try_rm_tcs_files(tcs=None):
|
||||
if tcs is None:
|
||||
tcs = test_cases
|
||||
@@ -134,7 +143,7 @@ def generator(test_case):
|
||||
raise
|
||||
|
||||
if try_num == RETRIES:
|
||||
report_warning(u'Failed due to network errors, skipping...')
|
||||
report_warning('Failed due to network errors, skipping...')
|
||||
return
|
||||
|
||||
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
|
||||
@@ -206,7 +215,7 @@ def generator(test_case):
|
||||
|
||||
return test_template
|
||||
|
||||
### And add them to TestDownload
|
||||
# And add them to TestDownload
|
||||
for n, test_case in enumerate(defs):
|
||||
test_method = generator(test_case)
|
||||
tname = 'test_' + str(test_case['name'])
|
||||
|
@@ -23,6 +23,7 @@ from youtube_dl.extractor import (
|
||||
class BaseTestSubtitles(unittest.TestCase):
|
||||
url = None
|
||||
IE = None
|
||||
|
||||
def setUp(self):
|
||||
self.DL = FakeYDL()
|
||||
self.ie = self.IE(self.DL)
|
||||
@@ -237,7 +238,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
|
@@ -9,14 +9,13 @@ rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
IGNORED_FILES = [
|
||||
'setup.py', # http://bugs.python.org/issue13943
|
||||
'conf.py',
|
||||
'buildserver.py',
|
||||
]
|
||||
|
||||
|
||||
class TestUnicodeLiterals(unittest.TestCase):
|
||||
def test_all_files(self):
|
||||
print('Skipping this test (not yet fully implemented)')
|
||||
return
|
||||
|
||||
for dirpath, _, filenames in os.walk(rootDir):
|
||||
for basename in filenames:
|
||||
if not basename.endswith('.py'):
|
||||
@@ -30,10 +29,10 @@ class TestUnicodeLiterals(unittest.TestCase):
|
||||
|
||||
if "'" not in code and '"' not in code:
|
||||
continue
|
||||
imps = 'from __future__ import unicode_literals'
|
||||
self.assertTrue(
|
||||
imps in code,
|
||||
' %s missing in %s' % (imps, fn))
|
||||
self.assertRegexpMatches(
|
||||
code,
|
||||
r'(?:#.*\n*)?from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
|
||||
'unicode_literals import missing in %s' % fn)
|
||||
|
||||
m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
|
||||
if m is not None:
|
||||
|
@@ -45,8 +45,9 @@ from youtube_dl.utils import (
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
js_to_json,
|
||||
get_filesystem_encoding,
|
||||
intlist_to_bytes,
|
||||
args_to_str,
|
||||
parse_filesize,
|
||||
)
|
||||
|
||||
|
||||
@@ -170,7 +171,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
||||
|
||||
def test_smuggle_url(self):
|
||||
data = {u"ö": u"ö", u"abc": [3]}
|
||||
data = {"ö": "ö", "abc": [3]}
|
||||
url = 'https://foo.bar/baz?x=y#a'
|
||||
smug_url = smuggle_url(url, data)
|
||||
unsmug_url, unsmug_data = unsmuggle_url(smug_url)
|
||||
@@ -219,6 +220,9 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('0s'), 0)
|
||||
self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
|
||||
self.assertEqual(parse_duration('T30M38S'), 1838)
|
||||
self.assertEqual(parse_duration('5 s'), 5)
|
||||
self.assertEqual(parse_duration('3 min'), 180)
|
||||
self.assertEqual(parse_duration('2.5 hours'), 9000)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
@@ -361,5 +365,21 @@ class TestUtil(unittest.TestCase):
|
||||
intlist_to_bytes([0, 1, 127, 128, 255]),
|
||||
b'\x00\x01\x7f\x80\xff')
|
||||
|
||||
def test_args_to_str(self):
|
||||
self.assertEqual(
|
||||
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
|
||||
'foo ba/r -baz \'2 be\' \'\''
|
||||
)
|
||||
|
||||
def test_parse_filesize(self):
|
||||
self.assertEqual(parse_filesize(None), None)
|
||||
self.assertEqual(parse_filesize(''), None)
|
||||
self.assertEqual(parse_filesize('91 B'), 91)
|
||||
self.assertEqual(parse_filesize('foobar'), None)
|
||||
self.assertEqual(parse_filesize('2 MiB'), 2097152)
|
||||
self.assertEqual(parse_filesize('5 GB'), 5000000000)
|
||||
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
|
||||
self.assertEqual(parse_filesize('1,24 KB'), 1240)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
@@ -31,17 +32,16 @@ params = get_params({
|
||||
})
|
||||
|
||||
|
||||
|
||||
TEST_ID = 'gr51aVj-mLg'
|
||||
ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
|
||||
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
|
||||
|
||||
|
||||
class TestAnnotations(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Clear old files
|
||||
self.tearDown()
|
||||
|
||||
|
||||
def test_info_json(self):
|
||||
expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text.
|
||||
ie = youtube_dl.extractor.YoutubeIE()
|
||||
@@ -71,7 +71,6 @@ class TestAnnotations(unittest.TestCase):
|
||||
# We should have seen (and removed) all the expected annotation texts.
|
||||
self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
try_rm(ANNOTATIONS_FILE)
|
||||
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
@@ -32,7 +33,7 @@ params = get_params({
|
||||
TEST_ID = 'BaW_jenozKc'
|
||||
INFO_JSON_FILE = TEST_ID + '.info.json'
|
||||
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
||||
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
|
||||
EXPECTED_DESCRIPTION = '''test chars: "'/\ä↭𝕐
|
||||
test URL: https://github.com/rg3/youtube-dl/issues/1892
|
||||
|
||||
This is a test video for youtube-dl.
|
||||
@@ -53,11 +54,11 @@ class TestInfoJSON(unittest.TestCase):
|
||||
self.assertTrue(os.path.exists(INFO_JSON_FILE))
|
||||
with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
|
||||
jd = json.load(jsonf)
|
||||
self.assertEqual(jd['upload_date'], u'20121002')
|
||||
self.assertEqual(jd['upload_date'], '20121002')
|
||||
self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
|
||||
self.assertEqual(jd['id'], TEST_ID)
|
||||
self.assertEqual(jd['extractor'], 'youtube')
|
||||
self.assertEqual(jd['title'], u'''youtube-dl test video "'/\ä↭𝕐''')
|
||||
self.assertEqual(jd['title'], '''youtube-dl test video "'/\ä↭𝕐''')
|
||||
self.assertEqual(jd['uploader'], 'Philipp Hagemeister')
|
||||
|
||||
self.assertTrue(os.path.exists(DESCRIPTION_FILE))
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
@@ -12,10 +13,6 @@ from test.helper import FakeYDL
|
||||
from youtube_dl.extractor import (
|
||||
YoutubePlaylistIE,
|
||||
YoutubeIE,
|
||||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeTopListIE,
|
||||
YoutubeSearchURLIE,
|
||||
)
|
||||
|
||||
|
||||
|
@@ -60,6 +60,7 @@ from .utils import (
|
||||
write_string,
|
||||
YoutubeDLHandler,
|
||||
prepend_extension,
|
||||
args_to_str,
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
@@ -253,6 +254,22 @@ class YoutubeDL(object):
|
||||
self.print_debug_header()
|
||||
self.add_default_info_extractors()
|
||||
|
||||
def warn_if_short_id(self, argv):
|
||||
# short YouTube ID starting with dash?
|
||||
idxs = [
|
||||
i for i, a in enumerate(argv)
|
||||
if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
|
||||
if idxs:
|
||||
correct_argv = (
|
||||
['youtube-dl'] +
|
||||
[a for i, a in enumerate(argv) if i not in idxs] +
|
||||
['--'] + [argv[i] for i in idxs]
|
||||
)
|
||||
self.report_warning(
|
||||
'Long argument string detected. '
|
||||
'Use -- to separate parameters and URLs, like this:\n%s\n' %
|
||||
args_to_str(correct_argv))
|
||||
|
||||
def add_info_extractor(self, ie):
|
||||
"""Add an InfoExtractor object to the end of the list."""
|
||||
self._ies.append(ie)
|
||||
@@ -682,14 +699,17 @@ class YoutubeDL(object):
|
||||
self.report_warning(
|
||||
'Extractor %s returned a compat_list result. '
|
||||
'It needs to be updated.' % ie_result.get('extractor'))
|
||||
|
||||
def _fixup(r):
|
||||
self.add_extra_info(r,
|
||||
self.add_extra_info(
|
||||
r,
|
||||
{
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
})
|
||||
}
|
||||
)
|
||||
return r
|
||||
ie_result['entries'] = [
|
||||
self.process_ie_result(_fixup(r), download, extra_info)
|
||||
@@ -767,6 +787,10 @@ class YoutubeDL(object):
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
||||
# Working around negative timestamps in Windows
|
||||
# (see http://bugs.python.org/issue1646728)
|
||||
if info_dict['timestamp'] < 0 and os.name == 'nt':
|
||||
info_dict['timestamp'] = 0
|
||||
upload_date = datetime.datetime.utcfromtimestamp(
|
||||
info_dict['timestamp'])
|
||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||
|
@@ -128,7 +128,6 @@ def _real_main(argv=None):
|
||||
compat_print(desc)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
# Conflicting, missing and erroneous options
|
||||
if opts.usenetrc and (opts.username is not None or opts.password is not None):
|
||||
parser.error('using .netrc conflicts with giving username/password')
|
||||
@@ -190,7 +189,7 @@ def _real_main(argv=None):
|
||||
|
||||
# --all-sub automatically sets --write-sub if --write-auto-sub is not given
|
||||
# this was the old behaviour if only --all-sub was given.
|
||||
if opts.allsubtitles and (opts.writeautomaticsub == False):
|
||||
if opts.allsubtitles and not opts.writeautomaticsub:
|
||||
opts.writesubtitles = True
|
||||
|
||||
if sys.version_info < (3,):
|
||||
@@ -317,7 +316,6 @@ def _real_main(argv=None):
|
||||
ydl.add_post_processor(FFmpegAudioFixPP())
|
||||
ydl.add_post_processor(AtomicParsleyPP())
|
||||
|
||||
|
||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||
if opts.exec_cmd:
|
||||
@@ -334,11 +332,12 @@ def _real_main(argv=None):
|
||||
|
||||
# Maybe do nothing
|
||||
if (len(all_urls) < 1) and (opts.load_info_filename is None):
|
||||
if not (opts.update_self or opts.rm_cachedir):
|
||||
parser.error('you must provide at least one URL')
|
||||
else:
|
||||
if opts.update_self or opts.rm_cachedir:
|
||||
sys.exit()
|
||||
|
||||
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
|
||||
parser.error('you must provide at least one URL')
|
||||
|
||||
try:
|
||||
if opts.load_info_filename is not None:
|
||||
retcode = ydl.download_with_info_file(opts.load_info_filename)
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Execute with
|
||||
# $ python youtube_dl/__main__.py (2.6+)
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
||||
|
||||
import base64
|
||||
@@ -7,6 +9,7 @@ from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
|
||||
|
||||
def aes_ctr_decrypt(data, key, counter):
|
||||
"""
|
||||
Decrypt with aes in counter mode
|
||||
@@ -32,6 +35,7 @@ def aes_ctr_decrypt(data, key, counter):
|
||||
|
||||
return decrypted_data
|
||||
|
||||
|
||||
def aes_cbc_decrypt(data, key, iv):
|
||||
"""
|
||||
Decrypt with aes in CBC mode
|
||||
@@ -57,6 +61,7 @@ def aes_cbc_decrypt(data, key, iv):
|
||||
|
||||
return decrypted_data
|
||||
|
||||
|
||||
def key_expansion(data):
|
||||
"""
|
||||
Generate key schedule
|
||||
@@ -91,6 +96,7 @@ def key_expansion(data):
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def aes_encrypt(data, expanded_key):
|
||||
"""
|
||||
Encrypt one block with aes
|
||||
@@ -111,6 +117,7 @@ def aes_encrypt(data, expanded_key):
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def aes_decrypt(data, expanded_key):
|
||||
"""
|
||||
Decrypt one block with aes
|
||||
@@ -131,6 +138,7 @@ def aes_decrypt(data, expanded_key):
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def aes_decrypt_text(data, password, key_size_bytes):
|
||||
"""
|
||||
Decrypt text
|
||||
@@ -157,6 +165,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
|
||||
class Counter:
|
||||
__value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
|
||||
|
||||
def next_value(self):
|
||||
temp = self.__value
|
||||
self.__value = inc(self.__value)
|
||||
@@ -241,15 +250,19 @@ RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7
|
||||
0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
|
||||
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
|
||||
|
||||
|
||||
def sub_bytes(data):
|
||||
return [SBOX[x] for x in data]
|
||||
|
||||
|
||||
def sub_bytes_inv(data):
|
||||
return [SBOX_INV[x] for x in data]
|
||||
|
||||
|
||||
def rotate(data):
|
||||
return data[1:] + [data[0]]
|
||||
|
||||
|
||||
def key_schedule_core(data, rcon_iteration):
|
||||
data = rotate(data)
|
||||
data = sub_bytes(data)
|
||||
@@ -257,14 +270,17 @@ def key_schedule_core(data, rcon_iteration):
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def xor(data1, data2):
|
||||
return [x ^ y for x, y in zip(data1, data2)]
|
||||
|
||||
|
||||
def rijndael_mul(a, b):
|
||||
if(a == 0 or b == 0):
|
||||
return 0
|
||||
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
|
||||
|
||||
|
||||
def mix_column(data, matrix):
|
||||
data_mixed = []
|
||||
for row in range(4):
|
||||
@@ -275,6 +291,7 @@ def mix_column(data, matrix):
|
||||
data_mixed.append(mixed)
|
||||
return data_mixed
|
||||
|
||||
|
||||
def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
|
||||
data_mixed = []
|
||||
for i in range(4):
|
||||
@@ -282,9 +299,11 @@ def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
|
||||
data_mixed += mix_column(column, matrix)
|
||||
return data_mixed
|
||||
|
||||
|
||||
def mix_columns_inv(data):
|
||||
return mix_columns(data, MIX_COLUMN_MATRIX_INV)
|
||||
|
||||
|
||||
def shift_rows(data):
|
||||
data_shifted = []
|
||||
for column in range(4):
|
||||
@@ -292,6 +311,7 @@ def shift_rows(data):
|
||||
data_shifted.append(data[((column + row) & 0b11) * 4 + row])
|
||||
return data_shifted
|
||||
|
||||
|
||||
def shift_rows_inv(data):
|
||||
data_shifted = []
|
||||
for column in range(4):
|
||||
@@ -299,6 +319,7 @@ def shift_rows_inv(data):
|
||||
data_shifted.append(data[((column - row) & 0b11) * 4 + row])
|
||||
return data_shifted
|
||||
|
||||
|
||||
def inc(data):
|
||||
data = data[:] # copy
|
||||
for i in range(len(data) - 1, -1, -1):
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import getpass
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
@@ -174,12 +175,17 @@ try:
|
||||
from shlex import quote as shlex_quote
|
||||
except ImportError: # Python < 3.3
|
||||
def shlex_quote(s):
|
||||
if re.match(r'^[-_\w./]+$', s):
|
||||
return s
|
||||
else:
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
def compat_ord(c):
|
||||
if type(c) is int: return c
|
||||
else: return ord(c)
|
||||
if type(c) is int:
|
||||
return c
|
||||
else:
|
||||
return ord(c)
|
||||
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
@@ -264,7 +270,7 @@ if sys.version_info < (3, 0):
|
||||
print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
|
||||
else:
|
||||
def compat_print(s):
|
||||
assert type(s) == type(u'')
|
||||
assert isinstance(s, compat_str)
|
||||
print(s)
|
||||
|
||||
|
||||
|
@@ -30,3 +30,8 @@ def get_suitable_downloader(info_dict):
|
||||
return F4mFD
|
||||
else:
|
||||
return HttpFD
|
||||
|
||||
__all__ = [
|
||||
'get_suitable_downloader',
|
||||
'FileDownloader',
|
||||
]
|
||||
|
@@ -225,13 +225,15 @@ class F4mFD(FileDownloader):
|
||||
self.to_screen('[download] Downloading f4m manifest')
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
self.report_destination(filename)
|
||||
http_dl = HttpQuietDownloader(self.ydl,
|
||||
http_dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'test': self.params.get('test', False),
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
||||
|
@@ -28,14 +28,14 @@ class HlsFD(FileDownloader):
|
||||
if check_executable(program, ['-version']):
|
||||
break
|
||||
else:
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
return False
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (cmd[0], fsize))
|
||||
self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
@@ -45,8 +45,8 @@ class HlsFD(FileDownloader):
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'%s exited with code %d' % (program, retval))
|
||||
self.to_stderr('\n')
|
||||
self.report_error('%s exited with code %d' % (program, retval))
|
||||
return False
|
||||
|
||||
|
||||
@@ -101,4 +101,3 @@ class NativeHlsFD(FileDownloader):
|
||||
})
|
||||
self.try_rename(tmpfilename, filename)
|
||||
return True
|
||||
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
@@ -106,7 +108,7 @@ class HttpFD(FileDownloader):
|
||||
self.report_retry(count, retries)
|
||||
|
||||
if count > retries:
|
||||
self.report_error(u'giving up after %s retries' % retries)
|
||||
self.report_error('giving up after %s retries' % retries)
|
||||
return False
|
||||
|
||||
data_len = data.info().get('Content-length', None)
|
||||
@@ -124,10 +126,10 @@ class HttpFD(FileDownloader):
|
||||
min_data_len = self.params.get("min_filesize", None)
|
||||
max_data_len = self.params.get("max_filesize", None)
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
data_len_str = format_bytes(data_len)
|
||||
@@ -151,13 +153,13 @@ class HttpFD(FileDownloader):
|
||||
filename = self.undo_temp_name(tmpfilename)
|
||||
self.report_destination(filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error(u'unable to open for writing: %s' % str(err))
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
try:
|
||||
stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'unable to write data: %s' % str(err))
|
||||
self.to_stderr('\n')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
return False
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
@@ -188,10 +190,10 @@ class HttpFD(FileDownloader):
|
||||
self.slow_down(start, byte_counter - resume_len)
|
||||
|
||||
if stream is None:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
if tmpfilename != u'-':
|
||||
if tmpfilename != '-':
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
|
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_subprocess_get_DEVNULL
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
)
|
||||
@@ -13,19 +16,23 @@ class MplayerFD(FileDownloader):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
args = [
|
||||
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
||||
'-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
# Check for mplayer first
|
||||
try:
|
||||
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
subprocess.call(
|
||||
['mplayer', '-h'],
|
||||
stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0])
|
||||
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
|
||||
return False
|
||||
|
||||
# Download using mplayer.
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
@@ -35,6 +42,6 @@ class MplayerFD(FileDownloader):
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'mplayer exited with code %d' % retval)
|
||||
self.to_stderr('\n')
|
||||
self.report_error('mplayer exited with code %d' % retval)
|
||||
return False
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .abc import ABCIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
@@ -22,6 +24,7 @@ from .arte import (
|
||||
)
|
||||
from .audiomack import AudiomackIE
|
||||
from .auengine import AUEngineIE
|
||||
from .azubu import AzubuIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
@@ -32,9 +35,11 @@ from .bilibili import BiliBiliIE
|
||||
from .blinkx import BlinkxIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bpb import BpbIE
|
||||
from .br import BRIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .canal13cl import Canal13clIE
|
||||
@@ -117,6 +122,8 @@ from .fktv import (
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .fourtube import FourTubeIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
@@ -212,6 +219,7 @@ from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .minhateca import MinhatecaIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
@@ -238,9 +246,10 @@ from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .musicvault import MusicVaultIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .myspace import MySpaceIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import (
|
||||
@@ -372,6 +381,7 @@ from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .tapely import TapelyIE
|
||||
from .tass import TassIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
@@ -380,6 +390,7 @@ from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telemb import TeleMBIE
|
||||
from .tenplay import TenPlayIE
|
||||
@@ -391,6 +402,7 @@ from .thesixtyone import TheSixtyOneIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcIE, TlcDeIE
|
||||
from .tmz import TMZIE
|
||||
from .tnaflix import TNAFlixIE
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
@@ -404,11 +416,13 @@ from .trutube import TruTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .tunein import TuneInIE
|
||||
from .turbo import TurboIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twitch import TwitchIE
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
@@ -453,7 +467,10 @@ from .vine import (
|
||||
VineUserIE,
|
||||
)
|
||||
from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vk import (
|
||||
VKIE,
|
||||
VKUserVideosIE,
|
||||
)
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
@@ -477,6 +494,7 @@ from .wrzuta import WrzutaIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
@@ -507,6 +525,10 @@ from .youtube import (
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
ZingMp3AlbumIE,
|
||||
)
|
||||
|
||||
_ALL_CLASSES = [
|
||||
klass
|
||||
|
@@ -1,4 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -18,15 +19,14 @@ class AcademicEarthCourseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
playlist_id = m.group('id')
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title')
|
||||
r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<p class="excerpt"[^>]*?>(.*?)</p>',
|
||||
webpage, u'description', fatal=False)
|
||||
webpage, 'description', fatal=False)
|
||||
urls = re.findall(
|
||||
r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
|
||||
webpage)
|
||||
|
@@ -15,8 +15,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AddAnimeIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
|
||||
_TEST = {
|
||||
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
'md5': '72954ea10bc979ab5e2eb288b21425a0',
|
||||
@@ -29,9 +28,9 @@ class AddAnimeIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
except ExtractorError as ee:
|
||||
if not isinstance(ee.cause, compat_HTTPError) or \
|
||||
@@ -49,7 +48,7 @@ class AddAnimeIE(InfoExtractor):
|
||||
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
|
||||
redir_webpage)
|
||||
if av is None:
|
||||
raise ExtractorError(u'Cannot find redirect math task')
|
||||
raise ExtractorError('Cannot find redirect math task')
|
||||
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
|
||||
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
|
@@ -5,6 +5,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AdultSwimIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
|
||||
_TEST = {
|
||||
|
@@ -1,5 +1,4 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -26,8 +25,7 @@ class AparatIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
@@ -40,15 +38,15 @@ class AparatIE(InfoExtractor):
|
||||
for i, video_url in enumerate(video_urls):
|
||||
req = HEADRequest(video_url)
|
||||
res = self._request_webpage(
|
||||
req, video_id, note=u'Testing video URL %d' % i, errnote=False)
|
||||
req, video_id, note='Testing video URL %d' % i, errnote=False)
|
||||
if res:
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(u'No working video URLs found')
|
||||
raise ExtractorError('No working video URLs found')
|
||||
|
||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
|
||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
|
||||
r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -70,15 +70,17 @@ class AppleTrailersIE(InfoExtractor):
|
||||
uploader_id = mobj.group('company')
|
||||
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
|
||||
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
|
||||
# The ' in the onClick attributes are not escaped, it couldn't be parsed
|
||||
# like: http://trailers.apple.com/trailers/wb/gravity/
|
||||
|
||||
def _clean_json(m):
|
||||
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||
s = '<html>' + s + u'</html>'
|
||||
s = '<html>%s</html>' % s
|
||||
return s
|
||||
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
||||
|
||||
|
@@ -192,4 +192,3 @@ class ARDIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
@@ -24,17 +24,17 @@ class AudiomackIE(InfoExtractor):
|
||||
},
|
||||
# hosted on soundcloud via audiomack
|
||||
{
|
||||
'add_ie': ['Soundcloud'],
|
||||
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
|
||||
'file': '172419696.mp3',
|
||||
'info_dict':
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '172419696',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
|
||||
'title': 'Young Thug ft Lil Wayne - Take Kare',
|
||||
"upload_date": "20141016",
|
||||
"description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n",
|
||||
"uploader": "Young Thug World"
|
||||
}
|
||||
'uploader': 'Young Thug World',
|
||||
'upload_date': '20141016',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
93
youtube_dl/extractor/azubu.py
Normal file
93
youtube_dl/extractor/azubu.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class AzubuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
|
||||
'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
|
||||
'info_dict': {
|
||||
'id': '15575',
|
||||
'ext': 'mp4',
|
||||
'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
|
||||
'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g',
|
||||
'timestamp': 1417523507.334,
|
||||
'upload_date': '20141202',
|
||||
'duration': 9988.7,
|
||||
'uploader': 'GSL',
|
||||
'uploader_id': 414310,
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
|
||||
'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
|
||||
'info_dict': {
|
||||
'id': '9344',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
|
||||
'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g',
|
||||
'timestamp': 1410530893.320,
|
||||
'upload_date': '20140912',
|
||||
'duration': 172.385,
|
||||
'uploader': 'FnaticTV',
|
||||
'uploader_id': 272749,
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
|
||||
|
||||
title = data['title'].strip()
|
||||
description = data['description']
|
||||
thumbnail = data['thumbnail']
|
||||
view_count = data['view_count']
|
||||
uploader = data['user']['username']
|
||||
uploader_id = data['user']['id']
|
||||
|
||||
stream_params = json.loads(data['stream_params'])
|
||||
|
||||
timestamp = float_or_none(stream_params['creationDate'], 1000)
|
||||
duration = float_or_none(stream_params['length'], 1000)
|
||||
|
||||
renditions = stream_params.get('renditions') or []
|
||||
video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
|
||||
if video:
|
||||
renditions.append(video)
|
||||
|
||||
formats = [{
|
||||
'url': fmt['url'],
|
||||
'width': fmt['frameWidth'],
|
||||
'height': fmt['frameHeight'],
|
||||
'vbr': float_or_none(fmt['encodingRate'], 1000),
|
||||
'filesize': fmt['size'],
|
||||
'vcodec': fmt['videoCodec'],
|
||||
'container': fmt['videoContainer'],
|
||||
} for fmt in renditions if fmt['url']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -18,7 +18,7 @@ class BambuserIE(InfoExtractor):
|
||||
_TEST = {
|
||||
'url': 'http://bambuser.com/v/4050584',
|
||||
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
|
||||
#u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
|
||||
# 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
|
||||
'info_dict': {
|
||||
'id': '4050584',
|
||||
'ext': 'flv',
|
||||
@@ -73,7 +73,8 @@ class BambuserChannelIE(InfoExtractor):
|
||||
urls = []
|
||||
last_id = ''
|
||||
for i in itertools.count(1):
|
||||
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
|
||||
req_url = (
|
||||
'http://bambuser.com/xhr-api/index.php?username={user}'
|
||||
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
||||
'&method=broadcast&format=json&vid_older_than={last}'
|
||||
).format(user=user, count=self._STEP, last=last_id)
|
||||
|
@@ -1,9 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
@@ -55,7 +56,22 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
'info_dict': {
|
||||
'id': 'b03k3pb7',
|
||||
'ext': 'flv',
|
||||
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
||||
'description': '2. Invasion',
|
||||
'duration': 3600,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
@@ -102,6 +118,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
if error is not None:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
@@ -158,45 +178,19 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
subtitles[lang] = srt
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
group_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
if re.search(r'id="emp-error" class="notinuk">', webpage):
|
||||
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
expected=True)
|
||||
|
||||
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||
'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % group_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % group_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
@@ -207,6 +201,51 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
elif kind == 'captions':
|
||||
subtitles = self._extract_captions(media, programme_id)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
|
||||
if programme_id:
|
||||
player = self._download_json(
|
||||
'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
|
||||
group_id)['jsConf']['player']
|
||||
title = player['title']
|
||||
description = player['subtitle']
|
||||
duration = player['duration']
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
else:
|
||||
playlist = self._download_xml(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
|
||||
group_id, 'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % group_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % group_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % group_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(programme_id, subtitles)
|
||||
return
|
||||
|
@@ -64,6 +64,20 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
'uploader': 'redvsblue',
|
||||
'uploader_id': '792887',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://blip.tv/play/gbk766dkj4Yn',
|
||||
'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
|
||||
'info_dict': {
|
||||
'id': '1749452',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20090208',
|
||||
'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
|
||||
'title': 'Nostalgia Critic: Transformers',
|
||||
'timestamp': 1234068723,
|
||||
'uploader': 'NostalgiaCritic',
|
||||
'uploader_id': '246467',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -74,10 +88,12 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
# See https://github.com/rg3/youtube-dl/issues/857 and
|
||||
# https://github.com/rg3/youtube-dl/issues/4197
|
||||
if lookup_id:
|
||||
info_page = self._download_webpage(
|
||||
'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
|
||||
video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id')
|
||||
else:
|
||||
urlh = self._request_webpage(
|
||||
'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
|
||||
url = compat_urlparse.urlparse(urlh.geturl())
|
||||
qs = compat_urlparse.parse_qs(url.query)
|
||||
mobj = re.match(self._VALID_URL, qs['file'][0])
|
||||
|
||||
video_id = mobj.group('id')
|
||||
|
||||
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
|
||||
@@ -114,7 +130,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
msg = self._download_webpage(
|
||||
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
|
||||
video_id, 'Resolving URL for %s' % role)
|
||||
real_url = compat_urlparse.parse_qs(msg)['message'][0]
|
||||
real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
|
||||
|
||||
media_type = media_content.get('type')
|
||||
if media_type == 'text/srt' or url.endswith('.srt'):
|
||||
|
37
youtube_dl/extractor/bpb.py
Normal file
37
youtube_dl/extractor/bpb.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
'md5': '0792086e8e2bfbac9cdf27835d5f2093',
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
r'(http://film\.bpb\.de/player/dokument_[0-9]+\.mp4)',
|
||||
webpage, 'video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
@@ -14,7 +14,6 @@ class BreakIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
||||
'md5': '33aa4ff477ecd124d18d7b5d23b87ce5',
|
||||
'info_dict': {
|
||||
'id': '2468056',
|
||||
'ext': 'mp4',
|
||||
|
@@ -265,6 +265,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
url = rend['defaultURL']
|
||||
if not url:
|
||||
continue
|
||||
ext = None
|
||||
if rend['remote']:
|
||||
url_comp = compat_urllib_parse_urlparse(url)
|
||||
if url_comp.path.endswith('.m3u8'):
|
||||
@@ -276,7 +277,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
# akamaihd.net, but they don't use f4m manifests
|
||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
||||
ext = 'flv'
|
||||
else:
|
||||
if ext is None:
|
||||
ext = determine_ext(url)
|
||||
size = rend.get('size')
|
||||
formats.append({
|
||||
|
74
youtube_dl/extractor/buzzfeed.py
Normal file
74
youtube_dl/extractor/buzzfeed.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BuzzFeedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia',
|
||||
'info_dict': {
|
||||
'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss',
|
||||
'title': 'This Angry Ram Destroys A Punching Bag Like A Boss',
|
||||
'description': 'Rambro!',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'aVCR29aE_OQ',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141024',
|
||||
'uploader_id': 'Buddhanz1',
|
||||
'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl',
|
||||
'uploader': 'Buddhanz',
|
||||
'title': 'Angry Ram destroys a punching bag',
|
||||
}
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
|
||||
'params': {
|
||||
'skip_download': True, # Got enough YouTube download tests
|
||||
},
|
||||
'info_dict': {
|
||||
'description': 'Munchkin the Teddy Bear is back !',
|
||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'mVmBL8B-In0',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141124',
|
||||
'uploader_id': 'CindysMunchkin',
|
||||
'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
|
||||
'uploader': 'Munchkin the Shih Tzu',
|
||||
'title': 'Munchkin the Teddy Bear gets her exercise',
|
||||
},
|
||||
}]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
all_buckets = re.findall(
|
||||
r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'',
|
||||
webpage)
|
||||
|
||||
entries = []
|
||||
for bd_json in all_buckets:
|
||||
bd = json.loads(bd_json)
|
||||
video = bd.get('video') or bd.get('progload_video')
|
||||
if not video:
|
||||
continue
|
||||
entries.append(self.url_result(video['url']))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'entries': entries,
|
||||
}
|
@@ -45,4 +45,4 @@ class CBSIE(InfoExtractor):
|
||||
real_id = self._search_regex(
|
||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||
webpage, 'real video ID')
|
||||
return self.url_result(u'theplatform:%s' % real_id)
|
||||
return self.url_result('theplatform:%s' % real_id)
|
||||
|
@@ -5,6 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class Channel9IE(InfoExtractor):
|
||||
'''
|
||||
Common extractor for channel9.msdn.com.
|
||||
@@ -187,7 +188,8 @@ class Channel9IE(InfoExtractor):
|
||||
view_count = self._extract_view_count(html)
|
||||
comment_count = self._extract_comment_count(html)
|
||||
|
||||
common = {'_type': 'video',
|
||||
common = {
|
||||
'_type': 'video',
|
||||
'id': content_path,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
|
@@ -24,7 +24,7 @@ class ClipfishIE(InfoExtractor):
|
||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||
'duration': 82,
|
||||
},
|
||||
u'skip': 'Blocked in the US'
|
||||
'skip': 'Blocked in the US'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -34,7 +34,7 @@ class ClipfishIE(InfoExtractor):
|
||||
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||
(video_id, int(time.time())))
|
||||
doc = self._download_xml(
|
||||
info_url, video_id, note=u'Downloading info page')
|
||||
info_url, video_id, note='Downloading info page')
|
||||
title = doc.find('title').text
|
||||
video_url = doc.find('filename').text
|
||||
if video_url is None:
|
||||
|
@@ -39,6 +39,7 @@ class ClipsyndicateIE(InfoExtractor):
|
||||
transform_source=fix_xml_ampersands)
|
||||
|
||||
track_doc = pdoc.find('trackList/track')
|
||||
|
||||
def find_param(name):
|
||||
node = find_xpath_attr(track_doc, './/param', 'name', name)
|
||||
if node is not None:
|
||||
|
@@ -25,8 +25,7 @@ class CNNIE(InfoExtractor):
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
},
|
||||
},
|
||||
{
|
||||
}, {
|
||||
"url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
|
||||
"md5": "b5cc60c60a3477d185af8f19a2a26f4e",
|
||||
"info_dict": {
|
||||
|
@@ -10,7 +10,8 @@ from ..utils import int_or_none
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
||||
|
||||
_TESTS = [{
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
||||
'info_dict': {
|
||||
@@ -21,8 +22,7 @@ class CollegeHumorIE(InfoExtractor):
|
||||
'age_limit': 13,
|
||||
'duration': 187,
|
||||
},
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
||||
'info_dict': {
|
||||
@@ -33,9 +33,8 @@ class CollegeHumorIE(InfoExtractor):
|
||||
'age_limit': 10,
|
||||
'duration': 179,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
# embedded youtube video
|
||||
{
|
||||
'url': 'http://www.collegehumor.com/embed/6950306',
|
||||
'info_dict': {
|
||||
'id': 'Z-bao9fg6Yc',
|
||||
|
@@ -13,6 +13,7 @@ import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -296,9 +297,11 @@ class InfoExtractor(object):
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
|
||||
return (content, urlh)
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
|
||||
content_type = urlh.headers.get('Content-Type', '')
|
||||
webpage_bytes = urlh.read()
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||
if m:
|
||||
encoding = m.group(1)
|
||||
@@ -434,6 +437,7 @@ class InfoExtractor(object):
|
||||
if video_id is not None:
|
||||
video_info['id'] = video_id
|
||||
return video_info
|
||||
|
||||
@staticmethod
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None):
|
||||
"""Returns a playlist"""
|
||||
@@ -814,6 +818,12 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(msg)
|
||||
return res
|
||||
|
||||
def _set_cookie(self, domain, name, value, expire_time=None):
|
||||
cookie = compat_cookiejar.Cookie(
|
||||
0, name, value, None, None, domain, None,
|
||||
None, '/', True, False, expire_time, '', None, None, None)
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -69,11 +69,9 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(data)
|
||||
iv = bytes_to_intlist(iv)
|
||||
@@ -99,8 +97,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||
return shaHash + [0] * 12
|
||||
|
||||
key = obfuscate_key(id)
|
||||
|
||||
class Counter:
|
||||
__value = iv
|
||||
|
||||
def next_value(self):
|
||||
temp = self.__value
|
||||
self.__value = inc(self.__value)
|
||||
@@ -248,7 +248,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
subtitles = {}
|
||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
|
||||
sub_page = self._download_webpage(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
||||
video_id, note='Downloading subtitles for ' + sub_name)
|
||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
||||
|
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
@staticmethod
|
||||
def _build_request(url):
|
||||
@@ -27,6 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
request.add_header('Cookie', 'ff=off')
|
||||
return request
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
"""Information Extractor for Dailymotion"""
|
||||
|
||||
|
@@ -11,15 +11,15 @@ from ..utils import url_basename
|
||||
|
||||
class DropboxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
|
||||
_TESTS = [{
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
||||
'info_dict': {
|
||||
'id': 'nelirfsxnmcfbfh',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
|
||||
'only_matching': True,
|
||||
},
|
||||
|
@@ -125,7 +125,7 @@ class EightTracksIE(InfoExtractor):
|
||||
info = {
|
||||
'id': compat_str(track_data['id']),
|
||||
'url': track_data['track_file_stream_url'],
|
||||
'title': track_data['performer'] + u' - ' + track_data['name'],
|
||||
'title': track_data['performer'] + ' - ' + track_data['name'],
|
||||
'raw_title': track_data['name'],
|
||||
'uploader_id': data['user']['login'],
|
||||
'ext': 'm4a',
|
||||
|
48
youtube_dl/extractor/foxgay.py
Normal file
48
youtube_dl/extractor/foxgay.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FoxgayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
|
||||
_TEST = {
|
||||
'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
|
||||
'md5': '80d72beab5d04e1655a56ad37afe6841',
|
||||
'info_dict': {
|
||||
'id': '2582',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a',
|
||||
'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(?P<title>.*?)</title>',
|
||||
webpage, 'title', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
# Find the URL for the iFrame which contains the actual video.
|
||||
iframe = self._download_webpage(
|
||||
self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'),
|
||||
video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r"v_path = '(?P<vid>http://.*?)'", iframe, 'url')
|
||||
thumb_url = self._html_search_regex(
|
||||
r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'description': description,
|
||||
'thumbnail': thumb_url,
|
||||
'age_limit': 18,
|
||||
}
|
94
youtube_dl/extractor/foxnews.py
Normal file
94
youtube_dl/extractor/foxnews.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FoxNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
|
||||
'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
|
||||
'info_dict': {
|
||||
'id': '3937480',
|
||||
'ext': 'flv',
|
||||
'title': 'Frozen in Time',
|
||||
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
|
||||
'duration': 265,
|
||||
'timestamp': 1304411491,
|
||||
'upload_date': '20110503',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
|
||||
'md5': '5846c64a1ea05ec78175421b8323e2df',
|
||||
'info_dict': {
|
||||
'id': '3922535568001',
|
||||
'ext': 'mp4',
|
||||
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
|
||||
'description': "Congressman discusses the president's executive action",
|
||||
'duration': 292,
|
||||
'timestamp': 1417662047,
|
||||
'upload_date': '20141204',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
|
||||
|
||||
item = video['channel']['item']
|
||||
title = item['title']
|
||||
description = item['description']
|
||||
timestamp = parse_iso8601(item['dc-date'])
|
||||
|
||||
media_group = item['media-group']
|
||||
duration = None
|
||||
formats = []
|
||||
for media in media_group['media-content']:
|
||||
attributes = media['@attributes']
|
||||
video_url = attributes['url']
|
||||
if video_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
|
||||
elif video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
|
||||
elif not video_url.endswith('.smil'):
|
||||
duration = int_or_none(attributes.get('duration'))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': media['media-category']['@attributes']['label'],
|
||||
'preference': 1,
|
||||
'vbr': int_or_none(attributes.get('bitrate')),
|
||||
'filesize': int_or_none(attributes.get('fileSize'))
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
media_thumbnail = media_group['media-thumbnail']['@attributes']
|
||||
thumbnails = [{
|
||||
'url': media_thumbnail['url'],
|
||||
'width': int_or_none(media_thumbnail.get('width')),
|
||||
'height': int_or_none(media_thumbnail.get('height')),
|
||||
}] if media_thumbnail else []
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
@@ -26,6 +26,19 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
if info.get('status') == 'NOK':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
|
||||
allowed_countries = info['videos'][0].get('geoblocage')
|
||||
if allowed_countries:
|
||||
georestricted = True
|
||||
geo_info = self._download_json(
|
||||
'http://geo.francetv.fr/ws/edgescape.json', video_id,
|
||||
'Downloading geo restriction info')
|
||||
country = geo_info['reponse']['geo_info']['country_code']
|
||||
if country not in allowed_countries:
|
||||
raise ExtractorError(
|
||||
'The video is not available from your location',
|
||||
expected=True)
|
||||
else:
|
||||
georestricted = False
|
||||
|
||||
formats = []
|
||||
for video in info['videos']:
|
||||
@@ -36,6 +49,10 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
continue
|
||||
format_id = video['format']
|
||||
if video_url.endswith('.f4m'):
|
||||
if georestricted:
|
||||
# See https://github.com/rg3/youtube-dl/issues/3963
|
||||
# m3u8 urls work fine
|
||||
continue
|
||||
video_url_parsed = compat_urllib_parse_urlparse(video_url)
|
||||
f4m_url = self._download_webpage(
|
||||
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
||||
|
@@ -11,7 +11,7 @@ class GamekingsIE(InfoExtractor):
|
||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
# MD5 is flaky, seems to change regularly
|
||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
u'info_dict': {
|
||||
'info_dict': {
|
||||
'id': '20130811',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||
|
@@ -445,6 +445,30 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Rosetta #CometLanding webcast HL 10',
|
||||
}
|
||||
},
|
||||
# LazyYT
|
||||
{
|
||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||
'info_dict': {
|
||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
},
|
||||
# Direct link with incorrect MIME type
|
||||
{
|
||||
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
||||
'md5': '4ccbebe5f36706d85221f204d7eb5913',
|
||||
'info_dict': {
|
||||
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
||||
'id': '5_Lennart_Poettering_-_Systemd',
|
||||
'ext': 'webm',
|
||||
'title': '5_Lennart_Poettering_-_Systemd',
|
||||
'upload_date': '20141120',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'URL could be a direct video link, returning it as such.'
|
||||
]
|
||||
}
|
||||
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -537,9 +561,9 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
if default_search in ('error', 'fixup_error'):
|
||||
raise ExtractorError(
|
||||
('%r is not a valid URL. '
|
||||
'%r is not a valid URL. '
|
||||
'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
|
||||
) % (url, url), expected=True)
|
||||
% (url, url), expected=True)
|
||||
else:
|
||||
if ':' not in default_search:
|
||||
default_search += ':'
|
||||
@@ -598,10 +622,28 @@ class GenericIE(InfoExtractor):
|
||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||
|
||||
if full_response:
|
||||
webpage = self._webpage_read_content(full_response, url, video_id)
|
||||
else:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if not full_response:
|
||||
full_response = self._request_webpage(url, video_id)
|
||||
|
||||
# Maybe it's a direct link to a video?
|
||||
# Be careful not to download the whole thing!
|
||||
first_bytes = full_response.read(512)
|
||||
if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
|
||||
self._downloader.report_warning(
|
||||
'URL could be a direct video link, returning it as such.')
|
||||
upload_date = unified_strdate(
|
||||
head_response.headers.get('Last-Modified'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': os.path.splitext(url_basename(url))[0],
|
||||
'direct': True,
|
||||
'url': url,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
webpage = self._webpage_read_content(
|
||||
full_response, url, video_id, prefix=first_bytes)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed?
|
||||
@@ -702,6 +744,12 @@ class GenericIE(InfoExtractor):
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
|
||||
# Look for lazyYT YouTube embed
|
||||
matches = re.findall(
|
||||
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||
@@ -1025,4 +1073,3 @@ class GenericIE(InfoExtractor):
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
@@ -9,14 +9,15 @@ from ..utils import (
|
||||
determine_ext,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GorillaVidIE(InfoExtractor):
|
||||
IE_DESC = 'GorillaVid.in, daclips.in and movpod.in'
|
||||
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in and fastvideo.in'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<host>(?:www\.)?
|
||||
(?:daclips\.in|gorillavid\.in|movpod\.in))/
|
||||
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in))/
|
||||
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
||||
'''
|
||||
|
||||
@@ -49,6 +50,16 @@ class GorillaVidIE(InfoExtractor):
|
||||
'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
}
|
||||
}, {
|
||||
# video with countdown timeout
|
||||
'url': 'http://fastvideo.in/1qmdn1lmsmbw',
|
||||
'md5': '8b87ec3f6564a3108a0e8e66594842ba',
|
||||
'info_dict': {
|
||||
'id': '1qmdn1lmsmbw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Man of Steel - Trailer',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://movpod.in/0wguyyxi1yca',
|
||||
'only_matching': True,
|
||||
@@ -71,6 +82,12 @@ class GorillaVidIE(InfoExtractor):
|
||||
''', webpage))
|
||||
|
||||
if fields['op'] == 'download1':
|
||||
countdown = int_or_none(self._search_regex(
|
||||
r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
|
||||
webpage, 'countdown', default=None))
|
||||
if countdown:
|
||||
self._sleep(countdown, video_id)
|
||||
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
@@ -78,9 +95,13 @@ class GorillaVidIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
||||
|
||||
title = self._search_regex(r'style="z-index: [0-9]+;">([^<]+)</span>', webpage, 'title')
|
||||
video_url = self._search_regex(r'file\s*:\s*\'(http[^\']+)\',', webpage, 'file url')
|
||||
thumbnail = self._search_regex(r'image\s*:\s*\'(http[^\']+)\',', webpage, 'thumbnail', fatal=False)
|
||||
title = self._search_regex(
|
||||
r'style="z-index: [0-9]+;">([^<]+)</span>',
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
video_url = self._search_regex(
|
||||
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
|
||||
thumbnail = self._search_regex(
|
||||
r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
|
@@ -1,12 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
)
|
||||
@@ -16,25 +17,24 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
|
||||
'file': '1435540.mp3',
|
||||
'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
|
||||
'info_dict': {
|
||||
'id': '1435540',
|
||||
'ext': 'mp3',
|
||||
'title': 'Freddie Gibbs - Lay It Down'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
webpage_src = self._download_webpage(url, video_id)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url_base64 = self._search_regex(
|
||||
r'data-path="(.*?)"', webpage_src, u'video URL', fatal=False)
|
||||
r'data-path="(.*?)"', webpage, 'video URL', default=None)
|
||||
|
||||
if video_url_base64 is None:
|
||||
video_url = self._search_regex(
|
||||
r'"contentUrl" content="(.*?)"', webpage_src, u'video URL')
|
||||
r'"contentUrl" content="(.*?)"', webpage, 'content URL')
|
||||
return self.url_result(video_url, ie='Youtube')
|
||||
|
||||
reqdata = compat_urllib_parse.urlencode([
|
||||
@@ -59,11 +59,11 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
if video_url.endswith('.html'):
|
||||
raise ExtractorError('Redirect failed')
|
||||
|
||||
video_title = self._og_search_title(webpage_src).strip()
|
||||
video_title = self._og_search_title(webpage).strip()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage_src),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -63,8 +63,10 @@ class IGNIE(InfoExtractor):
|
||||
'id': '078fdd005f6d3c02f63d795faa1b984f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||
'description': 'Giant skeletons, bloody hunts, and captivating'
|
||||
' natural beauty take our breath away.',
|
||||
'description': (
|
||||
'Giant skeletons, bloody hunts, and captivating'
|
||||
' natural beauty take our breath away.'
|
||||
),
|
||||
},
|
||||
},
|
||||
]
|
||||
|
@@ -58,9 +58,13 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
item = info.find('channel/item')
|
||||
|
||||
def _bp(p):
|
||||
return xpath_with_ns(p,
|
||||
{'media': 'http://search.yahoo.com/mrss/',
|
||||
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
|
||||
return xpath_with_ns(
|
||||
p,
|
||||
{
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
|
||||
}
|
||||
)
|
||||
formats = []
|
||||
for content in item.findall(_bp('media:group/media:content')):
|
||||
attr = content.attrib
|
||||
|
@@ -45,4 +45,3 @@ class JadoreCettePubIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
|
@@ -13,8 +13,10 @@ class KickStarterIE(InfoExtractor):
|
||||
'id': '1404461844',
|
||||
'ext': 'mp4',
|
||||
'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
|
||||
'description': 'A unique motocross documentary that examines the '
|
||||
'life and mind of one of sports most elite athletes: Josh Grant.',
|
||||
'description': (
|
||||
'A unique motocross documentary that examines the '
|
||||
'life and mind of one of sports most elite athletes: Josh Grant.'
|
||||
),
|
||||
},
|
||||
}, {
|
||||
'note': 'Embedded video (not using the native kickstarter video service)',
|
||||
|
@@ -30,4 +30,3 @@ class Ku6IE(InfoExtractor):
|
||||
'title': title,
|
||||
'url': downloadUrl
|
||||
}
|
||||
|
||||
|
@@ -75,4 +75,3 @@ class Laola1TvIE(InfoExtractor):
|
||||
'categories': categories,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
|
||||
|
@@ -19,8 +19,7 @@ class LiveLeakIE(InfoExtractor):
|
||||
'uploader': 'ljfriel2',
|
||||
'title': 'Most unlucky car accident'
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||
'info_dict': {
|
||||
@@ -30,8 +29,7 @@ class LiveLeakIE(InfoExtractor):
|
||||
'uploader': 'ARD_Stinkt',
|
||||
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
||||
'info_dict': {
|
||||
|
@@ -7,6 +7,7 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class MalemotionIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
|
||||
_TEST = {
|
||||
|
72
youtube_dl/extractor/minhateca.py
Normal file
72
youtube_dl/extractor/minhateca.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
)
|
||||
|
||||
|
||||
class MinhatecaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
|
||||
_TEST = {
|
||||
'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
|
||||
'info_dict': {
|
||||
'id': '125848331',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'filesize_approx': 1530000,
|
||||
'duration': 9,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
token = self._html_search_regex(
|
||||
r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
|
||||
webpage, 'request token')
|
||||
token_data = [
|
||||
('fileId', video_id),
|
||||
('__RequestVerificationToken', token),
|
||||
]
|
||||
req = compat_urllib_request.Request(
|
||||
'http://minhateca.com.br/action/License/Download',
|
||||
data=compat_urllib_parse.urlencode(token_data))
|
||||
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
data = self._download_json(
|
||||
req, video_id, note='Downloading metadata')
|
||||
|
||||
video_url = data['redirectUrl']
|
||||
title_str = self._html_search_regex(
|
||||
r'<h1.*?>(.*?)</h1>', webpage, 'title')
|
||||
title, _, ext = title_str.rpartition('.')
|
||||
filesize_approx = parse_filesize(self._html_search_regex(
|
||||
r'<p class="fileSize">(.*?)</p>',
|
||||
webpage, 'file size approximation', fatal=False))
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
|
||||
webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<p class="downloadsCounter">([0-9]+)</p>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'ext': ext,
|
||||
'filesize_approx': filesize_approx,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@@ -49,7 +49,7 @@ class MooshareIE(InfoExtractor):
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
if re.search(r'>Video Not Found or Deleted<', page) is not None:
|
||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
|
||||
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
|
||||
|
@@ -164,7 +164,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if mgid is None or ':' not in mgid:
|
||||
mgid = self._search_regex(
|
||||
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||
webpage, u'mgid')
|
||||
webpage, 'mgid')
|
||||
return self._get_videos_info(mgid)
|
||||
|
||||
|
||||
@@ -245,7 +245,7 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
|
||||
webpage, re.DOTALL)
|
||||
if m_vevo:
|
||||
vevo_id = m_vevo.group(1);
|
||||
vevo_id = m_vevo.group(1)
|
||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
|
||||
|
@@ -73,4 +73,3 @@ class MuenchenTVIE(InfoExtractor):
|
||||
'is_live': True,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
@@ -1,47 +1,48 @@
|
||||
import re
|
||||
import json
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class MuzuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||
IE_NAME = u'muzu.tv'
|
||||
IE_NAME = 'muzu.tv'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
|
||||
u'file': u'1981454.mp4',
|
||||
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
|
||||
u'info_dict': {
|
||||
u'title': u'Cat Walk (Original Mix)',
|
||||
u'description': u'md5:90e868994de201b2570e4e5854e19420',
|
||||
u'uploader': u'MarcAshken featuring SOS',
|
||||
'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
|
||||
'md5': '98f8b2c7bc50578d6a0364fff2bfb000',
|
||||
'info_dict': {
|
||||
'id': '1981454',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cat Walk (Original Mix)',
|
||||
'description': 'md5:90e868994de201b2570e4e5854e19420',
|
||||
'uploader': 'MarcAshken featuring SOS',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info_data = compat_urllib_parse.urlencode({'format': 'json',
|
||||
info_data = compat_urllib_parse.urlencode({
|
||||
'format': 'json',
|
||||
'url': url,
|
||||
})
|
||||
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
|
||||
video_id, u'Downloading video info')
|
||||
info = json.loads(video_info_page)
|
||||
info = self._download_json(
|
||||
'http://www.muzu.tv/api/oembed/?%s' % info_data,
|
||||
video_id, 'Downloading video info')
|
||||
|
||||
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
|
||||
video_id, u'Downloading player info')
|
||||
video_info = json.loads(player_info_page)['videos'][0]
|
||||
player_info = self._download_json(
|
||||
'http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
|
||||
video_id, 'Downloading player info')
|
||||
video_info = player_info['videos'][0]
|
||||
for quality in ['1080', '720', '480', '360']:
|
||||
if video_info.get('v%s' % quality):
|
||||
break
|
||||
|
||||
data = compat_urllib_parse.urlencode({'ai': video_id,
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'ai': video_id,
|
||||
# Even if each time you watch a video the hash changes,
|
||||
# it seems to work for different videos, and it will work
|
||||
# even if you use any non empty string as a hash
|
||||
@@ -49,15 +50,15 @@ class MuzuTVIE(InfoExtractor):
|
||||
'device': 'web',
|
||||
'qv': quality,
|
||||
})
|
||||
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
|
||||
video_id, u'Downloading video url')
|
||||
video_url_info = json.loads(video_url_page)
|
||||
video_url_info = self._download_json(
|
||||
'http://player.muzu.tv/player/requestVideo?%s' % data,
|
||||
video_id, 'Downloading video url')
|
||||
video_url = video_url_info['url']
|
||||
|
||||
return {'id': video_id,
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'description': info['description'],
|
||||
'uploader': info['author_name'],
|
||||
|
@@ -1,12 +1,14 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class MySpaceIE(InfoExtractor):
|
||||
@@ -14,33 +16,58 @@ class MySpaceIE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
|
||||
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
|
||||
'info_dict': {
|
||||
'id': '100008689',
|
||||
'id': '109594919',
|
||||
'ext': 'flv',
|
||||
'title': 'Viva La Vida',
|
||||
'description': 'The official Viva La Vida video, directed by Hype Williams',
|
||||
'uploader': 'Coldplay',
|
||||
'uploader_id': 'coldplay',
|
||||
'title': 'Little Big Town',
|
||||
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
||||
'uploader': 'Five Minutes to the Stage',
|
||||
'uploader_id': 'fiveminutestothestage',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# song
|
||||
# songs
|
||||
{
|
||||
'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
|
||||
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
|
||||
'info_dict': {
|
||||
'id': '39008454',
|
||||
'id': '93388656',
|
||||
'ext': 'flv',
|
||||
'title': 'Darkness In My Heart',
|
||||
'uploader_id': 'spiderbags',
|
||||
'title': 'Of weakened soul...',
|
||||
'uploader': 'Killsorrow',
|
||||
'uploader_id': 'killsorrow',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'add_ie': ['Vevo'],
|
||||
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
|
||||
'info_dict': {
|
||||
'id': 'USZM20600099',
|
||||
'ext': 'mp4',
|
||||
'title': 'Animal I Have Become',
|
||||
'uploader': 'Three Days Grace',
|
||||
'timestamp': int,
|
||||
'upload_date': '20060502',
|
||||
},
|
||||
'skip': 'VEVO is only available in some countries',
|
||||
}, {
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
|
||||
'info_dict': {
|
||||
'id': 'ypWvQgnJrSU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starset - First Light',
|
||||
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
|
||||
'uploader': 'Jacob Soren',
|
||||
'uploader_id': 'SorenPromotions',
|
||||
'upload_date': '20140725',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
@@ -48,22 +75,47 @@ class MySpaceIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_url = self._search_regex(
|
||||
r'playerSwf":"([^"?]*)', webpage, 'player URL')
|
||||
|
||||
if mobj.group('mediatype').startswith('music/song'):
|
||||
# songs don't store any useful info in the 'context' variable
|
||||
song_data = self._search_regex(
|
||||
r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
|
||||
webpage, 'song_data', default=None, group=0)
|
||||
if song_data is None:
|
||||
# some songs in an album are not playable
|
||||
self.report_warning(
|
||||
'%s: No downloadable song on this page' % video_id)
|
||||
return
|
||||
|
||||
def search_data(name):
|
||||
return self._search_regex(r'data-%s="(.*?)"' % name, webpage,
|
||||
name)
|
||||
return self._search_regex(
|
||||
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
|
||||
song_data, name, default='', group='data')
|
||||
streamUrl = search_data('stream-url')
|
||||
if not streamUrl:
|
||||
vevo_id = search_data('vevo-id')
|
||||
youtube_id = search_data('youtube-id')
|
||||
if vevo_id:
|
||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
elif youtube_id:
|
||||
self.to_screen('Youtube video detected: %s' % youtube_id)
|
||||
return self.url_result(youtube_id, ie='Youtube')
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Found song but don\'t know how to download it')
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'uploader': search_data('artist-name'),
|
||||
'uploader_id': search_data('artist-username'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
else:
|
||||
context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
|
||||
u'context'))
|
||||
context = json.loads(self._search_regex(
|
||||
r'context = ({.*?});', webpage, 'context'))
|
||||
video = context['video']
|
||||
streamUrl = video['streamUrl']
|
||||
info = {
|
||||
@@ -79,6 +131,50 @@ class MySpaceIE(InfoExtractor):
|
||||
info.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'ext': 'flv',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class MySpaceAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'MySpace:album'
|
||||
_VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
|
||||
'info_dict': {
|
||||
'title': 'Transmissions',
|
||||
'id': '19455773',
|
||||
},
|
||||
'playlist_count': 14,
|
||||
'skip': 'this album is only available in some countries',
|
||||
}, {
|
||||
'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
|
||||
'info_dict': {
|
||||
'title': 'The Demo',
|
||||
'id': '18596029',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
display_id = mobj.group('title') + playlist_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
|
||||
if not tracks_paths:
|
||||
raise ExtractorError(
|
||||
'%s: No songs found, try using proxy' % display_id,
|
||||
expected=True)
|
||||
entries = [
|
||||
self.url_result(t_path, ie=MySpaceIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'display_id': display_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'entries': entries,
|
||||
}
|
||||
|
@@ -173,4 +173,3 @@ class MyVideoIE(InfoExtractor):
|
||||
'play_path': video_playpath,
|
||||
'player_url': video_swfobj,
|
||||
}
|
||||
|
||||
|
29
youtube_dl/extractor/myvidster.py
Normal file
29
youtube_dl/extractor/myvidster.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MyVidsterIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
|
||||
'md5': '95296d0231c1363222c3441af62dc4ca',
|
||||
'info_dict': {
|
||||
'id': '3685814',
|
||||
'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
|
||||
'upload_date': '20141027',
|
||||
'uploader_id': 'utkualp',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'add_ie': ['XHamster'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return self.url_result(self._html_search_regex(
|
||||
r'rel="videolink" href="(?P<real_url>.*)">',
|
||||
webpage, 'real video url'))
|
@@ -39,7 +39,6 @@ class NBAIE(InfoExtractor):
|
||||
duration = parse_duration(
|
||||
self._html_search_meta('duration', webpage, 'duration', fatal=False))
|
||||
|
||||
|
||||
return {
|
||||
'id': shortened_video_id,
|
||||
'url': video_url,
|
||||
|
@@ -4,9 +4,12 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse
|
||||
)
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -22,9 +25,11 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
initial_video_url = info['publishPoint']
|
||||
if info['formats'] == '1':
|
||||
parsed_url = compat_urllib_parse_urlparse(initial_video_url)
|
||||
path = parsed_url.path.replace('.', '_sd.', 1)
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'type': 'fvod',
|
||||
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
||||
'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
|
||||
})
|
||||
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
||||
path_doc = self._download_xml(
|
||||
@@ -71,6 +76,17 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
'duration': 0,
|
||||
'upload_date': '20141011',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
|
||||
'md5': 'c78fc64ea01777e426cfc202b746c825',
|
||||
'info_dict': {
|
||||
'id': '58665',
|
||||
'ext': 'flv',
|
||||
'title': 'Classic Game In Six - April 22, 1979',
|
||||
'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
|
||||
'duration': 400,
|
||||
'upload_date': '20100129'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
||||
'only_matching': True,
|
||||
@@ -88,7 +104,7 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com:videocenter'
|
||||
IE_DESC = 'NHL videocenter category'
|
||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
|
||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
|
||||
_TEST = {
|
||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
|
||||
'info_dict': {
|
||||
@@ -122,7 +138,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||
response = self._download_webpage(request_url, playlist_title)
|
||||
response = self._fix_json(response)
|
||||
if not response.strip():
|
||||
self._downloader.report_warning(u'Got an empty reponse, trying '
|
||||
self._downloader.report_warning('Got an empty reponse, trying '
|
||||
'adding the "newvideos" parameter')
|
||||
response = self._download_webpage(request_url + '&newvideos=true',
|
||||
playlist_title)
|
||||
|
@@ -27,8 +27,7 @@ class NineGagIE(InfoExtractor):
|
||||
"thumbnail": "re:^https?://",
|
||||
},
|
||||
'add_ie': ['Youtube']
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
|
||||
'info_dict': {
|
||||
'id': 'KklwM',
|
||||
|
@@ -20,6 +20,7 @@ class NocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'http://noco.tv/do.php'
|
||||
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
|
||||
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TEST = {
|
||||
@@ -60,10 +61,12 @@ class NocoIE(InfoExtractor):
|
||||
if 'erreur' in login:
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id, note):
|
||||
def _call_api(self, path, video_id, note, sub_lang=None):
|
||||
ts = compat_str(int(time.time() * 1000))
|
||||
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
|
||||
url = self._API_URL_TEMPLATE % (path, ts, tk)
|
||||
if sub_lang:
|
||||
url += self._SUB_LANG_TEMPLATE % sub_lang
|
||||
|
||||
resp = self._download_json(url, video_id, note)
|
||||
|
||||
@@ -91,11 +94,14 @@ class NocoIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
|
||||
for lang, lang_dict in medias['fr']['video_list'].items():
|
||||
for format_id, fmt in lang_dict['quality_list'].items():
|
||||
format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
|
||||
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
|
||||
video_id, 'Downloading %s video JSON' % format_id)
|
||||
video_id, 'Downloading %s video JSON' % format_id_extended,
|
||||
lang if lang != 'none' else None)
|
||||
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
@@ -107,7 +113,7 @@ class NocoIE(InfoExtractor):
|
||||
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': format_id,
|
||||
'format_id': format_id_extended,
|
||||
'width': fmt['res_width'],
|
||||
'height': fmt['res_lines'],
|
||||
'abr': fmt['audiobitrate'],
|
||||
|
@@ -97,4 +97,3 @@ class OoyalaIE(InfoExtractor):
|
||||
}
|
||||
else:
|
||||
return self._extract_result(videos_info[0], videos_more_info)
|
||||
|
||||
|
@@ -4,6 +4,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
@@ -28,6 +30,11 @@ class PlayvidIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_error = re.search(
|
||||
r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
|
||||
if m_error:
|
||||
raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
|
||||
|
||||
video_title = None
|
||||
duration = None
|
||||
video_thumbnail = None
|
||||
|
@@ -6,6 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PodomaticIE(InfoExtractor):
|
||||
IE_NAME = 'podomatic'
|
||||
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
||||
|
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -9,32 +7,23 @@ class RedTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.redtube.com/66418',
|
||||
'file': '66418.mp4',
|
||||
# md5 varies from time to time, as in
|
||||
# https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
|
||||
#'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
||||
'info_dict': {
|
||||
'id': '66418',
|
||||
'ext': 'mp4',
|
||||
"title": "Sucked on a toilet",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
video_extension = 'mp4'
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
|
||||
|
||||
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
|
||||
webpage, u'title')
|
||||
|
||||
webpage, 'title')
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
# No self-labeling, but they describe themselves as
|
||||
@@ -44,7 +33,7 @@ class RedTubeIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_extension,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'age_limit': age_limit,
|
||||
|
@@ -41,4 +41,3 @@ class RingTVIE(InfoExtractor):
|
||||
'thumbnail': thumbnail_url,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
|
@@ -28,9 +28,8 @@ class RtlXlIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uuid = mobj.group('uuid')
|
||||
|
||||
# Use m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
|
||||
info = self._download_json(
|
||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/d=pc/fmt=adaptive/' % uuid,
|
||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
||||
uuid)
|
||||
|
||||
material = info['material'][0]
|
||||
@@ -39,12 +38,13 @@ class RtlXlIE(InfoExtractor):
|
||||
progname = info['abstracts'][0]['name']
|
||||
subtitle = material['title'] or info['episodes'][0]['name']
|
||||
|
||||
videopath = material['videopath']
|
||||
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
|
||||
videopath = material['videopath'].replace('.f4m', '.m3u8')
|
||||
m3u8_url = 'http://manifest.us.rtl.nl' + videopath
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
|
||||
|
||||
video_urlpart = videopath.split('/adaptive/')[1][:-4]
|
||||
video_urlpart = videopath.split('/flash/')[1][:-5]
|
||||
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
|
||||
|
||||
formats.extend([
|
||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
class RTSIE(InfoExtractor):
|
||||
IE_DESC = 'RTS.ch'
|
||||
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?rts\.ch/(?:(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+))'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -23,6 +23,7 @@ class RTSIE(InfoExtractor):
|
||||
'md5': '753b877968ad8afaeddccc374d4256a5',
|
||||
'info_dict': {
|
||||
'id': '3449373',
|
||||
'display_id': 'les-enfants-terribles',
|
||||
'ext': 'mp4',
|
||||
'duration': 1488,
|
||||
'title': 'Les Enfants Terribles',
|
||||
@@ -30,7 +31,8 @@ class RTSIE(InfoExtractor):
|
||||
'uploader': 'Divers',
|
||||
'upload_date': '19680921',
|
||||
'timestamp': -40280400,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
'thumbnail': 're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -38,6 +40,7 @@ class RTSIE(InfoExtractor):
|
||||
'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
|
||||
'info_dict': {
|
||||
'id': '5624067',
|
||||
'display_id': 'entre-ciel-et-mer',
|
||||
'ext': 'mp4',
|
||||
'duration': 3720,
|
||||
'title': 'Les yeux dans les cieux - Mon homard au Canada',
|
||||
@@ -45,7 +48,8 @@ class RTSIE(InfoExtractor):
|
||||
'uploader': 'Passe-moi les jumelles',
|
||||
'upload_date': '20140404',
|
||||
'timestamp': 1396635300,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
'thumbnail': 're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -53,6 +57,7 @@ class RTSIE(InfoExtractor):
|
||||
'md5': 'b4326fecd3eb64a458ba73c73e91299d',
|
||||
'info_dict': {
|
||||
'id': '5745975',
|
||||
'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
|
||||
'ext': 'mp4',
|
||||
'duration': 48,
|
||||
'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
|
||||
@@ -60,7 +65,8 @@ class RTSIE(InfoExtractor):
|
||||
'uploader': 'Hockey',
|
||||
'upload_date': '20140403',
|
||||
'timestamp': 1396556882,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
'thumbnail': 're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Blocked outside Switzerland',
|
||||
},
|
||||
@@ -69,6 +75,7 @@ class RTSIE(InfoExtractor):
|
||||
'md5': '9bb06503773c07ce83d3cbd793cebb91',
|
||||
'info_dict': {
|
||||
'id': '5745356',
|
||||
'display_id': 'londres-cachee-par-un-epais-smog',
|
||||
'ext': 'mp4',
|
||||
'duration': 33,
|
||||
'title': 'Londres cachée par un épais smog',
|
||||
@@ -76,7 +83,8 @@ class RTSIE(InfoExtractor):
|
||||
'uploader': 'Le Journal en continu',
|
||||
'upload_date': '20140403',
|
||||
'timestamp': 1396537322,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
'thumbnail': 're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -84,6 +92,7 @@ class RTSIE(InfoExtractor):
|
||||
'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
|
||||
'info_dict': {
|
||||
'id': '5706148',
|
||||
'display_id': 'urban-hippie-de-damien-krisl-03-04-2014',
|
||||
'ext': 'mp3',
|
||||
'duration': 123,
|
||||
'title': '"Urban Hippie", de Damien Krisl',
|
||||
@@ -92,22 +101,44 @@ class RTSIE(InfoExtractor):
|
||||
'timestamp': 1396551600,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260',
|
||||
'md5': '968777c8779e5aa2434be96c54e19743',
|
||||
'info_dict': {
|
||||
'id': '6348260',
|
||||
'display_id': 'le-19h30',
|
||||
'ext': 'mp4',
|
||||
'duration': 1796,
|
||||
'title': 'Le 19h30',
|
||||
'description': '',
|
||||
'uploader': 'Le 19h30',
|
||||
'upload_date': '20141201',
|
||||
'timestamp': 1417458600,
|
||||
'thumbnail': 're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/play/tv/le-19h30/video/le-chantier-du-nouveau-parlement-vaudois-a-permis-une-trouvaille-historique?id=6348280',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
video_id = m.group('id') or m.group('id_new')
|
||||
display_id = m.group('display_id') or m.group('display_id_new')
|
||||
|
||||
def download_json(internal_id):
|
||||
return self._download_json(
|
||||
'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
|
||||
video_id)
|
||||
display_id)
|
||||
|
||||
all_info = download_json(video_id)
|
||||
|
||||
# video_id extracted out of URL is not always a real id
|
||||
if 'video' not in all_info and 'audio' not in all_info:
|
||||
page = self._download_webpage(url, video_id)
|
||||
page = self._download_webpage(url, display_id)
|
||||
internal_id = self._html_search_regex(
|
||||
r'<(?:video|audio) data-id="([0-9]+)"', page,
|
||||
'internal video id')
|
||||
@@ -143,6 +174,7 @@ class RTSIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'title': info['title'],
|
||||
'description': info.get('intro'),
|
||||
|
@@ -54,7 +54,6 @@ def _decrypt_url(png):
|
||||
return url
|
||||
|
||||
|
||||
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:alacarta'
|
||||
IE_DESC = 'RTVE a la carta'
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user