Compare commits

..

78 Commits

Author SHA1 Message Date
Philipp Hagemeister
727d2930f2 release 2014.07.20.2 2014-07-20 23:23:01 +02:00
Philipp Hagemeister
c13bf7c836 [swfinterp] Use helper function struct_unpack for old Python 2.x releases (#3270) 2014-07-20 23:20:15 +02:00
Philipp Hagemeister
f3308e138d release 2014.07.20.1 2014-07-20 21:38:29 +02:00
Philipp Hagemeister
29546b345b [ard] Add support for NDR-style videos (fixes #3281) 2014-07-20 21:38:02 +02:00
Jaime Marquínez Ferrándiz
2c57c7fa5a [youtube] Fix extraction of age gate videos (closes #3270)
Setting the correct value of the 'sts' paramater in the 'get_video_info' url gives the correct urls.
Removed parameters that are not needed.
2014-07-20 21:05:02 +02:00
Philipp Hagemeister
b6ea11b967 [youtube] Add swf signature test case (#3270) 2014-07-20 20:45:36 +02:00
Philipp Hagemeister
b8c74d606a [youtube] fix display of swf player id 2014-07-20 20:20:42 +02:00
Sergey M․
a5d524ef46 [allocine] Update tests 2014-07-21 00:28:55 +07:00
Philipp Hagemeister
cceb5ec237 release 2014.07.20 2014-07-20 18:47:03 +02:00
Philipp Hagemeister
71a6eaff83 Merge remote-tracking branch 'origin/master' 2014-07-20 18:32:59 +02:00
Philipp Hagemeister
7fd48d0413 [youtube] Correct signature testcase 2014-07-20 18:30:27 +02:00
Philipp Hagemeister
1b38b5be86 [swfinterp] Remove debugging code 2014-07-20 18:29:09 +02:00
Philipp Hagemeister
decf2ae400 [swfinterp] Correct array access 2014-07-20 18:28:49 +02:00
Philipp Hagemeister
0d989011ff [swfinterp] Add support for calling methods on objects 2014-07-20 14:49:10 +02:00
Philipp Hagemeister
01b4b74574 [swfinterp] Add support for calls to instance methods 2014-07-20 12:47:15 +02:00
Philipp Hagemeister
70f767dc65 [swfinterp] Add support for multiple classes 2014-07-20 00:25:58 +02:00
Philipp Hagemeister
e75c24e889 [swfinterp] Extend tests and fix parsing 2014-07-20 00:03:54 +02:00
Philipp Hagemeister
0cb2056304 [swfinterp] Start working on basic tests 2014-07-19 23:05:07 +02:00
Sergey M․
604f292ab7 [sapo] Add extractor (Closes #2816) 2014-07-20 00:00:20 +07:00
Sergey M․
23d3c422ab [francetv] Add support for mobile URLs (Closes #3275) 2014-07-19 17:47:50 +07:00
Sergey M․
0c1ffe980d [mlb] Fix _VALID_URL 2014-07-18 21:43:01 +07:00
Sergey M․
5e95cb27d6 Credit @hassaanaliw for cracked (#3274) 2014-07-18 21:41:34 +07:00
Sergey M․
3b86f936c5 Merge branch 'hassaanaliw-cracked' 2014-07-18 21:39:38 +07:00
Sergey M․
e0942e37aa [crackled] Improve, fix invalid regexes and extract more metadata 2014-07-18 21:39:21 +07:00
Sergey M․
c45a6caa95 [utils] Add None check in str_to_int 2014-07-18 21:37:40 +07:00
Sergey M․
61bbddbaa6 Merge branch 'cracked' of https://github.com/hassaanaliw/youtube-dl 2014-07-18 20:29:35 +07:00
Philipp Hagemeister
5425626790 [youtube] Move swfinterp into its own file 2014-07-18 10:24:28 +02:00
Philipp Hagemeister
5dc3552d85 [youtube] Add support for classes in swf parser 2014-07-18 00:54:17 +02:00
Philipp Hagemeister
3fbd27f73e [youtube] SWF parser: Add opcode 86
Yes, I know we need 96, but an implementation of 86 could help avoid a similar issue.
2014-07-17 23:22:49 +02:00
Philipp Hagemeister
0382ecb78d Merge pull request #3289 from Reventl0v/patch-1
Fix the url in the INSTALLATION section
2014-07-17 22:54:24 +02:00
Philipp Hagemeister
72edb6fc8c Merge remote-tracking branch 'origin/master' 2014-07-17 22:32:54 +02:00
Jaime Marquínez Ferrándiz
66149e3f2b [npo] Fix the json extraction (fixes #3282)
The comment in the javascript file is not always the same.
2014-07-17 22:29:03 +02:00
Reventl0v
6e74521d98 Fix the url in the INSTALLATION section 2014-07-17 21:08:43 +02:00
Philipp Hagemeister
cf01013161 [youtube] Find more swf players (Closes #3270, refer #3271) 2014-07-17 16:28:36 +02:00
Jaime Marquínez Ferrándiz
1e179c7528 Merge pull request #3283 from MikeCol/redtube_thumb_new
Redtube changed player config, new place to get thumb URL
2014-07-17 12:44:21 +02:00
MikeCol
530ed178b7 Redtube changed player config, new place to get thumb URL 2014-07-17 11:17:27 +02:00
Jaime Marquínez Ferrándiz
74aa18f68f [dfb] Add extractor (closes #3280) 2014-07-17 10:07:51 +02:00
Jaime Marquínez Ferrándiz
d9222264a8 [adultswim] The bitrate must be an integer or None (reported in #2952) 2014-07-17 09:31:48 +02:00
Jaime Marquínez Ferrándiz
ca14211e93 [adultswim] Simplify (closes #2952) 2014-07-17 09:27:06 +02:00
Jaime Marquínez Ferrándiz
b1d65c3369 Merge remote-tracking branch 'adammw/adultswim' 2014-07-17 09:21:43 +02:00
Jaime Marquínez Ferrándiz
b4c538b02b [comedycentral] Only recognize the cc.com domain
The old comedycentral.com urls redirect to the new urls.
2014-07-16 23:05:56 +02:00
Jaime Marquínez Ferrándiz
13059bceb2 [comedycentral] Recognize 'full-episodes' urls (fixes #3277) 2014-07-16 23:05:56 +02:00
Sergey M․
d8894e24a4 [rtbf] Fix data video regex 2014-07-17 01:57:38 +07:00
Sergey M․
3b09757bac Credit @chaochichen for mlb (#3252) 2014-07-16 21:03:30 +07:00
Sergey M․
2f97f76877 Merge branch 'cracked' of https://github.com/hassaanaliw/youtube-dl into hassaanaliw-cracked 2014-07-16 20:55:38 +07:00
hassaanaliw
43f0537c06 [cracked] Add new extractor 2014-07-16 18:45:42 +05:00
Sergey M․
a816da0dc3 Merge branch 'chaochichen-MLB' 2014-07-16 20:42:01 +07:00
Sergey M․
7bb49d1057 [mlb] Extract more metadata and all formats, provide more tests 2014-07-16 20:40:28 +07:00
Sergey M․
1aa42fedee Merge branch 'MLB' of https://github.com/chaochichen/youtube-dl into chaochichen-MLB 2014-07-16 19:13:35 +07:00
Philipp Hagemeister
ee90ddab94 release 2014.07.15 2014-07-15 22:59:12 +02:00
Charles Chen
172240c0a4 Switched to use media detail XML to extract video URL 2014-07-15 13:55:23 -07:00
Jaime Marquínez Ferrándiz
ad25aee245 [youtube & jsinterp] Fix signature extraction (fixes #3255)
Some functions are defined now inside an object, the jsinterp will search its definition if the variable is not defined in the local namespace.
2014-07-15 22:46:39 +02:00
Sergey M․
bd1f325b42 [tutv] Replace 404 test and modernize 2014-07-15 19:32:42 +07:00
Sergey M․
00a82ea805 [soundcloud] Replace 404 test 2014-07-15 19:18:06 +07:00
Charles Chen
b1b01841af [MLB] Add new extractor 2014-07-14 11:00:55 -07:00
Filippo Valsorda
816930c485 Fix utils.strip_jsonp 2014-07-14 00:41:23 +02:00
Sergey M․
76233cda34 [pyvideo] Fix title extraction 2014-07-14 00:38:10 +07:00
Jaime Marquínez Ferrándiz
9dcea39985 [tlc.de] If the url contains a fragment, use if in the iframe url (reported in #2748)
The fragment is used in the webpage for selecting different videos.
2014-07-13 14:38:26 +02:00
Jaime Marquínez Ferrándiz
10d00a756a rename southparkstudios.py to southpark.py
And make the extractor only recognize southpark.cc.com urls, the old urls are redirected.
2014-07-13 14:08:23 +02:00
Jaime Marquínez Ferrándiz
eb50741129 Merge remote-tracking branch 'adammw/southpark' 2014-07-13 14:01:09 +02:00
Adam Malcontenti-Wilson
3804b01276 Update test 2014-07-13 21:29:04 +10:00
Adam Malcontenti-Wilson
b1298d8e06 Test for colon in mgid 2014-07-13 21:15:18 +10:00
Adam Malcontenti-Wilson
6a46dc8db7 Add southpark.cc.com to southpark IE 2014-07-13 12:48:30 +10:00
Filippo Valsorda
36cb99f958 [ReverbNation] Add new IE - closes #2250 2014-07-13 00:47:20 +02:00
Sergey M․
81650f95e2 [ruhd] Add extractor 2014-07-13 04:03:22 +07:00
Sergey M․
34dbcb8505 [ndr] Replace 404 test 2014-07-12 22:08:33 +07:00
Philipp Hagemeister
c993c829e2 [firedrive] Simplify 2014-07-12 14:27:14 +02:00
Philipp Hagemeister
0d90e0f067 Credit @naglis for firedrive (#3242) 2014-07-12 14:23:54 +02:00
Naglis Jonaitis
678f58de4b [firedrive] Add new extractor. Addresses #3095 2014-07-12 00:42:42 +03:00
Sergey M․
c961a0e63e [screencast] Add one more format and improve title extraction 2014-07-11 22:52:48 +07:00
Sergey M․
aaefb347c0 [gorillavid] Fix embedded videos extraction 2014-07-11 22:23:00 +07:00
Philipp Hagemeister
09018e19a5 release 2014.07.11.3 2014-07-11 17:21:16 +02:00
Sergey M․
345e37831c [youtube] Update nosubtitles test 2014-07-11 22:08:04 +07:00
Sergey M․
00ac799b68 [vine:user] Update test 2014-07-11 22:04:24 +07:00
Jaime Marquínez Ferrándiz
133af9385b Update supported formats for the --recode-video option (#3228) 2014-07-11 16:16:30 +02:00
Philipp Hagemeister
40c696e5c6 [screencast] Add suppot for more video types (#3236) 2014-07-11 15:39:24 +02:00
Adam Malcontenti-Wilson
d415299a80 [adultswim] Fix tests 2014-05-19 22:32:45 +10:00
Adam Malcontenti-Wilson
48fbb1003d [adultswim] Add new extractor 2014-05-19 22:05:46 +10:00
45 changed files with 1760 additions and 548 deletions

View File

@@ -12,7 +12,7 @@ To install it right away for all UNIX users (Linux, OS X, etc.), type:
If you do not have curl, you can alternatively use a recent wget: If you do not have curl, you can alternatively use a recent wget:
sudo wget https://yt-dl.org/downloads/2014.05.13/youtube-dl -O /usr/local/bin/youtube-dl sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
sudo chmod a+x /usr/local/bin/youtube-dl sudo chmod a+x /usr/local/bin/youtube-dl
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
@@ -255,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like.
128K (default 5) 128K (default 5)
--recode-video FORMAT Encode the video to another format if --recode-video FORMAT Encode the video to another format if
necessary (currently supported: necessary (currently supported:
mp4|flv|ogg|webm) mp4|flv|ogg|webm|mkv)
-k, --keep-video keeps the video file on disk after the -k, --keep-video keeps the video file on disk after the
post-processing; the video is erased by post-processing; the video is erased by
default default

1
test/swftests/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
*.swf

View File

@@ -0,0 +1,19 @@
// input: [["a", "b", "c", "d"]]
// output: ["c", "b", "a", "d"]
package {
public class ArrayAccess {
public static function main(ar:Array):Array {
var aa:ArrayAccess = new ArrayAccess();
return aa.f(ar, 2);
}
private function f(ar:Array, num:Number):Array{
var x:String = ar[0];
var y:String = ar[num % ar.length];
ar[0] = y;
ar[num] = x;
return ar;
}
}
}

View File

@@ -0,0 +1,17 @@
// input: []
// output: 121
package {
public class ClassCall {
public static function main():int{
var f:OtherClass = new OtherClass();
return f.func(100,20);
}
}
}
class OtherClass {
public function func(x: int, y: int):int {
return x+y+1;
}
}

View File

@@ -0,0 +1,15 @@
// input: []
// output: 0
package {
public class ClassConstruction {
public static function main():int{
var f:Foo = new Foo();
return 0;
}
}
}
class Foo {
}

View File

@@ -0,0 +1,13 @@
// input: [1, 2]
// output: 3
package {
public class LocalVars {
public static function main(a:int, b:int):int{
var c:int = a + b + b;
var d:int = c - b;
var e:int = d;
return e;
}
}
}

View File

@@ -0,0 +1,21 @@
// input: []
// output: 9
package {
public class PrivateCall {
public static function main():int{
var f:OtherClass = new OtherClass();
return f.func();
}
}
}
class OtherClass {
private function pf():int {
return 9;
}
public function func():int {
return this.pf();
}
}

View File

@@ -0,0 +1,13 @@
// input: [1]
// output: 1
package {
public class StaticAssignment {
public static var v:int;
public static function main(a:int):int{
v = a;
return v;
}
}
}

View File

@@ -0,0 +1,16 @@
// input: []
// output: 1
package {
public class StaticRetrieval {
public static var v:int;
public static function main():int{
if (v) {
return 0;
} else {
return 1;
}
}
}
}

View File

@@ -111,7 +111,7 @@ class TestPlaylists(unittest.TestCase):
ie = VineUserIE(dl) ie = VineUserIE(dl)
result = ie.extract('https://vine.co/Visa') result = ie.extract('https://vine.co/Visa')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertTrue(len(result['entries']) >= 50) self.assertTrue(len(result['entries']) >= 47)
def test_ustream_channel(self): def test_ustream_channel(self):
dl = FakeYDL() dl = FakeYDL()

View File

@@ -87,7 +87,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
def test_youtube_nosubtitles(self): def test_youtube_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles') self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8' self.url = 'n5BB19UTcdA'
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()

76
test/test_swfinterp.py Normal file
View File

@@ -0,0 +1,76 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import io
import json
import re
import subprocess
from youtube_dl.swfinterp import SWFInterpreter
TEST_DIR = os.path.join(
os.path.dirname(os.path.abspath(__file__)), 'swftests')
class TestSWFInterpreter(unittest.TestCase):
pass
def _make_testfunc(testfile):
m = re.match(r'^(.*)\.(as)$', testfile)
if not m:
return
test_id = m.group(1)
def test_func(self):
as_file = os.path.join(TEST_DIR, testfile)
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
if ((not os.path.exists(swf_file))
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
# Recompile
try:
subprocess.check_call(['mxmlc', '-output', swf_file, as_file])
except OSError as ose:
if ose.errno == errno.ENOENT:
print('mxmlc not found! Skipping test.')
return
raise
with open(swf_file, 'rb') as swf_f:
swf_content = swf_f.read()
swfi = SWFInterpreter(swf_content)
with io.open(as_file, 'r', encoding='utf-8') as as_f:
as_content = as_f.read()
def _find_spec(key):
m = re.search(
r'(?m)^//\s*%s:\s*(.*?)\n' % re.escape(key), as_content)
if not m:
raise ValueError('Cannot find %s in %s' % (key, testfile))
return json.loads(m.group(1))
input_args = _find_spec('input')
output = _find_spec('output')
swf_class = swfi.extract_class(test_id)
func = swfi.extract_function(swf_class, 'main')
res = func(input_args)
self.assertEqual(res, output)
test_func.__name__ = str('test_swf_' + test_id)
setattr(TestSWFInterpreter, test_func.__name__, test_func)
for testfile in os.listdir(TEST_DIR):
_make_testfunc(testfile)
if __name__ == '__main__':
unittest.main()

View File

@@ -33,12 +33,30 @@ _TESTS = [
90, 90,
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
), ),
(
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
u'js',
84,
u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
),
( (
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
u'js', u'js',
u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
), ),
(
u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
u'swf',
86,
u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
),
(
u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
u'swf',
u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
),
] ]
@@ -51,12 +69,12 @@ class TestSignature(unittest.TestCase):
def make_tfunc(url, stype, sig_input, expected_sig): def make_tfunc(url, stype, sig_input, expected_sig):
basename = url.rpartition('/')[2] m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) assert m, '%r should follow URL format' % url
assert m, '%r should follow URL format' % basename
test_id = m.group(1) test_id = m.group(1)
def test_func(self): def test_func(self):
basename = 'player-%s.%s' % (test_id, stype)
fn = os.path.join(self.TESTDATA_DIR, basename) fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn): if not os.path.exists(fn):

View File

@@ -63,6 +63,9 @@ __authors__ = (
'Ariset Llerena', 'Ariset Llerena',
'Adam Malcontenti-Wilson', 'Adam Malcontenti-Wilson',
'Tobias Bell', 'Tobias Bell',
'Naglis Jonaitis',
'Charles Chen',
'Hassaan Ali',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
@@ -509,7 +512,7 @@ def parseOpts(overrideArguments=None):
postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)') help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
help='keeps the video file on disk after the post-processing; the video is erased by default') help='keeps the video file on disk after the post-processing; the video is erased by default')
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,

View File

@@ -1,5 +1,6 @@
from .academicearth import AcademicEarthCourseIE from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
from .adultswim import AdultSwimIE
from .aftonbladet import AftonbladetIE from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE from .anitube import AnitubeIE
from .aol import AolIE from .aol import AolIE
@@ -52,6 +53,7 @@ from .cnn import (
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .crunchyroll import CrunchyrollIE from .crunchyroll import CrunchyrollIE
from .cspan import CSpanIE from .cspan import CSpanIE
@@ -62,6 +64,7 @@ from .dailymotion import (
DailymotionUserIE, DailymotionUserIE,
) )
from .daum import DaumIE from .daum import DaumIE
from .dfb import DFBIE
from .dotsub import DotsubIE from .dotsub import DotsubIE
from .dreisat import DreiSatIE from .dreisat import DreiSatIE
from .drtv import DRTVIE from .drtv import DRTVIE
@@ -83,6 +86,7 @@ from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .faz import FazIE from .faz import FazIE
from .fc2 import FC2IE from .fc2 import FC2IE
from .firedrive import FiredriveIE
from .firstpost import FirstpostIE from .firstpost import FirstpostIE
from .firsttv import FirstTVIE from .firsttv import FirstTVIE
from .fivemin import FiveMinIE from .fivemin import FiveMinIE
@@ -169,6 +173,7 @@ from .metacafe import MetacafeIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE, OCWMITIE from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mlb import MLBIE
from .mpora import MporaIE from .mpora import MporaIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
from .mooshare import MooshareIE from .mooshare import MooshareIE
@@ -231,6 +236,7 @@ from .radiofrance import RadioFranceIE
from .rai import RaiIE from .rai import RaiIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .reverbnation import ReverbNationIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
@@ -239,6 +245,7 @@ from .rtbf import RTBFIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .rts import RTSIE from .rts import RTSIE
from .rtve import RTVEALaCartaIE from .rtve import RTVEALaCartaIE
from .ruhd import RUHDIE
from .rutube import ( from .rutube import (
RutubeIE, RutubeIE,
RutubeChannelIE, RutubeChannelIE,
@@ -246,6 +253,7 @@ from .rutube import (
RutubePersonIE, RutubePersonIE,
) )
from .rutv import RUTVIE from .rutv import RUTVIE
from .sapo import SapoIE
from .savefrom import SaveFromIE from .savefrom import SaveFromIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
@@ -267,8 +275,8 @@ from .soundcloud import (
SoundcloudPlaylistIE SoundcloudPlaylistIE
) )
from .soundgasm import SoundgasmIE from .soundgasm import SoundgasmIE
from .southparkstudios import ( from .southpark import (
SouthParkStudiosIE, SouthParkIE,
SouthparkDeIE, SouthparkDeIE,
) )
from .space import SpaceIE from .space import SpaceIE
@@ -393,6 +401,7 @@ from .youtube import (
YoutubeUserIE, YoutubeUserIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zdf import ZDFIE from .zdf import ZDFIE

View File

@@ -0,0 +1,139 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class AdultSwimIE(InfoExtractor):
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
_TEST = {
'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
'playlist': [
{
'md5': '4da359ec73b58df4575cd01a610ba5dc',
'info_dict': {
'id': '8a250ba1450996e901453d7f02ca02f5',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
},
{
'md5': 'ffbdf55af9331c509d95350bd0cc1819',
'info_dict': {
'id': '8a250ba1450996e901453d7f4bd102f6',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
},
{
'md5': 'b92409635540304280b4b6c36bd14a0a',
'info_dict': {
'id': '8a250ba1450996e901453d7fa73c02f7',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
},
{
'md5': 'e8818891d60e47b29cd89d7b0278156d',
'info_dict': {
'id': '8a250ba1450996e901453d7fc8ba02f8',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
}
]
}
_video_extensions = {
'3500': 'flv',
'640': 'mp4',
'150': 'mp4',
'ipad': 'm3u8',
'iphone': 'm3u8'
}
_video_dimensions = {
'3500': (1280, 720),
'640': (480, 270),
'150': (320, 180)
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_path = mobj.group('path')
webpage = self._download_webpage(url, video_path)
episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id')
title = self._og_search_title(webpage)
index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
episode_el = idoc.find('.//episode')
show_title = episode_el.attrib.get('collectionTitle')
episode_title = episode_el.attrib.get('title')
thumbnail = episode_el.attrib.get('thumbnailUrl')
description = episode_el.find('./description').text.strip()
entries = []
segment_els = episode_el.findall('./segments/segment')
for part_num, segment_el in enumerate(segment_els):
segment_id = segment_el.attrib.get('id')
segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
thumbnail = segment_el.attrib.get('thumbnailUrl')
duration = segment_el.attrib.get('duration')
segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information')
formats = []
file_els = idoc.findall('.//files/file')
for file_el in file_els:
bitrate = file_el.attrib.get('bitrate')
type = file_el.attrib.get('type')
width, height = self._video_dimensions.get(bitrate, (None, None))
formats.append({
'format_id': '%s-%s' % (bitrate, type),
'url': file_el.text,
'ext': self._video_extensions.get(bitrate, 'mp4'),
# The bitrate may not be a number (for example: 'iphone')
'tbr': int(bitrate) if bitrate.isdigit() else None,
'height': height,
'width': width
})
self._sort_formats(formats)
entries.append({
'id': segment_id,
'title': segment_title,
'formats': formats,
'uploader': show_title,
'thumbnail': thumbnail,
'duration': duration,
'description': description
})
return {
'_type': 'playlist',
'id': episode_id,
'display_id': video_path,
'entries': entries,
'title': '%s %s' % (show_title, episode_title),
'description': description,
'thumbnail': thumbnail
}

View File

@@ -32,7 +32,7 @@ class AllocineIE(InfoExtractor):
'id': '19540403', 'id': '19540403',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF', 'title': 'Planes 2 Bande-annonce VF',
'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', 'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',
}, },
}, { }, {
@@ -42,7 +42,7 @@ class AllocineIE(InfoExtractor):
'id': '19544709', 'id': '19544709',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF', 'title': 'Dragons 2 - Bande annonce finale VF',
'description': 'md5:e74a4dc750894bac300ece46c7036490', 'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',
}, },
}] }]

View File

@@ -7,23 +7,32 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
qualities,
) )
class ARDIE(InfoExtractor): class ARDIE(InfoExtractor):
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?' _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TEST = { _TESTS = [{
'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786', 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
'file': '19288786.mp4', 'file': '22429276.mp4',
'md5': '515bf47ce209fb3f5a61b7aad364634c', 'md5': '469751912f1de0816a9fc9df8336476c',
'info_dict': { 'info_dict': {
'title': 'Edward Snowden im Interview - Held oder Verräter?', 'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.', 'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037',
}, },
'skip': 'Blocked outside of Germany', 'skip': 'Blocked outside of Germany',
} }, {
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
'info_dict': {
'id': '22490580',
'ext': 'mp4',
'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)',
'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.',
},
'skip': 'Blocked outside of Germany',
}]
def _real_extract(self, url): def _real_extract(self, url):
# determine video id from url # determine video id from url
@@ -43,40 +52,64 @@ class ARDIE(InfoExtractor):
r'<h4 class="headline">(.*?)</h4>'], r'<h4 class="headline">(.*?)</h4>'],
webpage, 'title') webpage, 'title')
description = self._html_search_meta( description = self._html_search_meta(
'dcterms.abstract', webpage, 'description') 'dcterms.abstract', webpage, 'description', default=None)
thumbnail = self._og_search_thumbnail(webpage) if description is None:
description = self._html_search_meta(
'description', webpage, 'meta description')
# Thumbnail is sometimes not present.
# It is in the mobile version, but that seems to use a different URL
# structure altogether.
thumbnail = self._og_search_thumbnail(webpage, default=None)
media_info = self._download_json( media_streams = re.findall(r'''(?x)
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id) mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
# The second element of the _mediaArray contains the standard http urls "([^"]+)"''', webpage)
streams = media_info['_mediaArray'][1]['_mediaStreamArray']
if not streams:
if '"fsk"' in webpage:
raise ExtractorError('This video is only available after 20:00')
formats = [] if media_streams:
QUALITIES = qualities(['lo', 'hi', 'hq'])
formats = []
for furl in set(media_streams):
if furl.endswith('.f4m'):
fid = 'f4m'
else:
fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
fid = fid_m.group(1) if fid_m else None
formats.append({
'quality': QUALITIES(fid),
'format_id': fid,
'url': furl,
})
else: # request JSON file
media_info = self._download_json(
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
# The second element of the _mediaArray contains the standard http urls
streams = media_info['_mediaArray'][1]['_mediaStreamArray']
if not streams:
if '"fsk"' in webpage:
raise ExtractorError('This video is only available after 20:00')
for s in streams: formats = []
if type(s['_stream']) == list: for s in streams:
for index, url in enumerate(s['_stream'][::-1]): if type(s['_stream']) == list:
quality = s['_quality'] + index for index, url in enumerate(s['_stream'][::-1]):
formats.append({ quality = s['_quality'] + index
'quality': quality, formats.append({
'url': url, 'quality': quality,
'format_id': '%s-%s' % (determine_ext(url), quality) 'url': url,
'format_id': '%s-%s' % (determine_ext(url), quality)
}) })
continue continue
format = { format = {
'quality': s['_quality'], 'quality': s['_quality'],
'url': s['_stream'], 'url': s['_stream'],
} }
format['format_id'] = '%s-%s' % ( format['format_id'] = '%s-%s' % (
determine_ext(format['url']), format['quality']) determine_ext(format['url']), format['quality'])
formats.append(format) formats.append(format)
self._sort_formats(formats) self._sort_formats(formats)

View File

@@ -14,13 +14,13 @@ from ..utils import (
class ComedyCentralIE(MTVServicesInfoExtractor): class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/ _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
(video-clips|episodes|cc-studios|video-collections) (video-clips|episodes|cc-studios|video-collections|full-episodes)
/(?P<title>.*)''' /(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/' _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TEST = { _TEST = {
'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': 'c4f48e9eda1b16dd10add0744344b6d8', 'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
'info_dict': { 'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',

View File

@@ -463,14 +463,14 @@ class InfoExtractor(object):
def _og_search_url(self, html, **kargs): def _og_search_url(self, html, **kargs):
return self._og_search_property('url', html, **kargs) return self._og_search_property('url', html, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False): def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
if display_name is None: if display_name is None:
display_name = name display_name = name
return self._html_search_regex( return self._html_search_regex(
r'''(?ix)<meta r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\']) (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name), [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=fatal) html, display_name, fatal=fatal, **kwargs)
def _dc_search_uploader(self, html): def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader') return self._html_search_meta('dc.creator', html, 'uploader')

View File

@@ -0,0 +1,65 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
str_to_int,
)
class CrackedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
_TEST = {
'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
'md5': '4b29a5eeec292cd5eca6388c7558db9e',
'info_dict': {
'id': '19006',
'ext': 'mp4',
'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies',
'description': 'md5:3b909e752661db86007d10e5ec2df769',
'timestamp': 1405659600,
'upload_date': '20140718',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False)
if timestamp:
timestamp = parse_iso8601(timestamp[:-6])
view_count = str_to_int(self._html_search_regex(
r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False))
m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
if m:
width = int(m.group('width'))
height = int(m.group('height'))
else:
width = height = None
return {
'id': video_id,
'url':video_url,
'title': title,
'description': description,
'timestamp': timestamp,
'view_count': view_count,
'comment_count': comment_count,
'height': height,
'width': width,
}

View File

@@ -0,0 +1,44 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class DFBIE(InfoExtractor):
IE_NAME = 'tv.dfb.de'
_VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
# The md5 is different each time
'info_dict': {
'id': '9070',
'ext': 'flv',
'title': 'Highlights des Empfangs in Berlin',
'upload_date': '20140716',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
player_info = self._download_xml(
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
video_id)
video_info = player_info.find('video')
f4m_info = self._download_xml(video_info.find('url').text, video_id)
token_el = f4m_info.find('token')
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
return {
'id': video_id,
'title': video_info.find('title').text,
'url': manifest_url,
'ext': 'flv',
'thumbnail': self._og_search_thumbnail(webpage),
'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
}

View File

@@ -0,0 +1,83 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
determine_ext,
)
class FiredriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
'(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
_FILE_DELETED_REGEX = r'<div class="removed_file_image">'
_TESTS = [{
'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
'info_dict': {
'id': 'FEB892FA160EBD01',
'ext': 'flv',
'title': 'bbb_theora_486kbit.flv',
'thumbnail': 're:^http://.*\.jpg$',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = 'http://firedrive.com/file/%s' % video_id
webpage = self._download_webpage(url, video_id)
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+
name="([^"]+)"\s+
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', webpage))
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
# Apparently, this header is required for confirmation to work.
req.add_header('Host', 'www.firedrive.com')
webpage = self._download_webpage(req, video_id,
'Downloading video page')
title = self._search_regex(r'class="external_title_left">(.+)</div>',
webpage, 'title')
thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
'thumbnail', fatal=False)
if thumbnail is not None:
thumbnail = 'http:' + thumbnail
ext = self._search_regex(r'type:\s?\'([^\']+)\',',
webpage, 'extension', fatal=False)
video_url = self._search_regex(
r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
formats = [{
'format_id': 'sd',
'url': video_url,
'ext': ext,
}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -48,7 +48,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
class FranceTvInfoIE(FranceTVBaseInfoExtractor): class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr' IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://www\.francetvinfo\.fr/.*/(?P<title>.+)\.html' _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@@ -211,7 +211,7 @@ class GenerationQuoiIE(InfoExtractor):
class CultureboxIE(FranceTVBaseInfoExtractor): class CultureboxIE(FranceTVBaseInfoExtractor):
IE_NAME = 'culturebox.francetvinfo.fr' IE_NAME = 'culturebox.francetvinfo.fr'
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = { _TEST = {
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813', 'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',

View File

@@ -14,8 +14,8 @@ from ..utils import (
class GorillaVidIE(InfoExtractor): class GorillaVidIE(InfoExtractor):
IE_DESC = 'GorillaVid.in and daclips.in' IE_DESC = 'GorillaVid.in and daclips.in'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?:www\.)? https?://(?P<host>(?:www\.)?
(?:daclips\.in|gorillavid\.in)/ (?:daclips\.in|gorillavid\.in))/
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
''' '''
@@ -39,6 +39,7 @@ class GorillaVidIE(InfoExtractor):
}, },
}, { }, {
'url': 'http://daclips.in/3rso4kdn6f9m', 'url': 'http://daclips.in/3rso4kdn6f9m',
'md5': '1ad8fd39bb976eeb66004d3a4895f106',
'info_dict': { 'info_dict': {
'id': '3rso4kdn6f9m', 'id': '3rso4kdn6f9m',
'ext': 'mp4', 'ext': 'mp4',
@@ -51,7 +52,7 @@ class GorillaVidIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id)
fields = dict(re.findall(r'''(?x)<input\s+ fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+ type="hidden"\s+

102
youtube_dl/extractor/mlb.py Normal file
View File

@@ -0,0 +1,102 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
find_xpath_attr,
)
class MLBIE(InfoExtractor):
_VALID_URL = r'https?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
_TESTS = [
{
'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
'info_dict': {
'id': '34496663',
'ext': 'mp4',
'title': 'Stanton prepares for Derby',
'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
'duration': 46,
'timestamp': 1405105800,
'upload_date': '20140711',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
'md5': '0e6e73d509321e142409b695eadd541f',
'info_dict': {
'id': '34578115',
'ext': 'mp4',
'title': 'Cespedes repeats as Derby champ',
'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
'duration': 488,
'timestamp': 1405399936,
'upload_date': '20140715',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
'md5': 'b8fd237347b844365d74ea61d4245967',
'info_dict': {
'id': '34577915',
'ext': 'mp4',
'title': 'Bautista on Home Run Derby',
'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
'duration': 52,
'timestamp': 1405390722,
'upload_date': '20140715',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
detail = self._download_xml(
'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
title = detail.find('./headline').text
description = detail.find('./big-blurb').text
duration = parse_duration(detail.find('./duration').text)
timestamp = parse_iso8601(detail.attrib['date'][:-5])
thumbnail = find_xpath_attr(
detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text
formats = []
for media_url in detail.findall('./url'):
playback_scenario = media_url.attrib['playback_scenario']
fmt = {
'url': media_url.text,
'format_id': playback_scenario,
}
m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
if m:
fmt.update({
'vbr': int(m.group('vbr')) * 1000,
'width': int(m.group('width')),
'height': int(m.group('height')),
})
formats.append(fmt)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
'thumbnail': thumbnail,
}

View File

@@ -158,6 +158,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
if mgid.endswith('.swf'): if mgid.endswith('.swf'):
mgid = mgid[:-4] mgid = mgid[:-4]
except RegexNotFoundError: except RegexNotFoundError:
mgid = None
if mgid is None or ':' not in mgid:
mgid = self._search_regex( mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
webpage, u'mgid') webpage, u'mgid')

View File

@@ -18,15 +18,15 @@ class NDRIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html', 'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html',
'md5': 'e7a6079ca39d3568f4996cb858dd6708', 'md5': '4a4eeafd17c3058b65f0c8f091355855',
'note': 'Video file', 'note': 'Video file',
'info_dict': { 'info_dict': {
'id': '7959', 'id': '325',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Markt - die ganze Sendung', 'title': 'Blaue Bohnen aus Blocken',
'description': 'md5:af9179cf07f67c5c12dc6d9997e05725', 'description': 'md5:190d71ba2ccddc805ed01547718963bc',
'duration': 2655, 'duration': 1715,
}, },
}, },
{ {

View File

@@ -32,7 +32,7 @@ class NPOIE(InfoExtractor):
'http://e.omroep.nl/metadata/aflevering/%s' % video_id, 'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
video_id, video_id,
# We have to remove the javascript callback # We have to remove the javascript callback
transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j) transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j)
) )
token_page = self._download_webpage( token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js', 'http://ida.omroep.nl/npoplayer/i.js',

View File

@@ -46,7 +46,7 @@ class PyvideoIE(InfoExtractor):
return self.url_result(m_youtube.group(1), 'Youtube') return self.url_result(m_youtube.group(1), 'Youtube')
title = self._html_search_regex( title = self._html_search_regex(
r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>', r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
webpage, 'title', flags=re.DOTALL) webpage, 'title', flags=re.DOTALL)
video_url = self._search_regex( video_url = self._search_regex(
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'], [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],

View File

@@ -35,9 +35,7 @@ class RedTubeIE(InfoExtractor):
r'<h1 class="videoTitle[^"]*">(.+?)</h1>', r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
webpage, u'title') webpage, u'title')
video_thumbnail = self._html_search_regex( video_thumbnail = self._og_search_thumbnail(webpage)
r'playerInnerHTML.+?<img\s+src="(.+?)"',
webpage, u'thumbnail', fatal=False)
# No self-labeling, but they describe themselves as # No self-labeling, but they describe themselves as
# "Home of Videos Porno" # "Home of Videos Porno"

View File

@@ -0,0 +1,45 @@
from __future__ import unicode_literals
import re
import time
from .common import InfoExtractor
from ..utils import strip_jsonp
class ReverbNationIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
'file': '16965047.mp3',
'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
'info_dict': {
"title": "MONA LISA",
"uploader": "ALKILADOS",
"uploader_id": 216429,
"thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
song_id = mobj.group('id')
api_res = self._download_json(
'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
% (song_id, int(time.time() * 1000)),
song_id,
transform_source=strip_jsonp,
note='Downloading information of song %s' % song_id
)
return {
'id': song_id,
'title': api_res.get('name'),
'url': api_res.get('url'),
'uploader': api_res.get('artist', {}).get('name'),
'uploader_id': api_res.get('artist', {}).get('id'),
'thumbnail': api_res.get('image', api_res.get('thumbnail')),
'ext': 'mp3',
'vcodec': 'none',
}

View File

@@ -30,7 +30,7 @@ class RTBFIE(InfoExtractor):
page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id) page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
data = json.loads(self._html_search_regex( data = json.loads(self._html_search_regex(
r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data'] r'<div class="js-player-embed(?: player-embed)?" data-video="([^"]+)"', page, 'data video'))['data']
video_url = data.get('downloadUrl') or data.get('url') video_url = data.get('downloadUrl') or data.get('url')

View File

@@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class RUHDIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
_TEST = {
'url': 'http://www.ruhd.ru/play.php?vid=207',
'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83',
'info_dict': {
'id': '207',
'ext': 'divx',
'title': 'КОТ бааааам',
'description': 'классный кот)',
'thumbnail': 're:^http://.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<param name="src" value="([^"]+)"', webpage, 'video url')
title = self._html_search_regex(
r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>', webpage, 'title')
description = self._html_search_regex(
r'(?s)<div id="longdesc">(.+?)<span id="showlink">', webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'<param name="previewImage" value="([^"]+)"', webpage, 'thumbnail', fatal=False)
if thumbnail:
thumbnail = 'http://www.ruhd.ru' + thumbnail
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
}

View File

@@ -0,0 +1,119 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
unified_strdate,
)
class SapoIE(InfoExtractor):
IE_DESC = 'SAPO Vídeos'
_VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})'
_TESTS = [
{
'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi',
'md5': '79ee523f6ecb9233ac25075dee0eda83',
'note': 'SD video',
'info_dict': {
'id': 'UBz95kOtiWYUMTA5Ghfi',
'ext': 'mp4',
'title': 'Benfica - Marcas na Hitória',
'description': 'md5:c9082000a128c3fd57bf0299e1367f22',
'duration': 264,
'uploader': 'tiago_1988',
'upload_date': '20080229',
'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'],
},
},
{
'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF',
'md5': '90a2f283cfb49193fe06e861613a72aa',
'note': 'HD video',
'info_dict': {
'id': 'IyusNAZ791ZdoCY5H5IF',
'ext': 'mp4',
'title': 'Codebits VII - Report',
'description': 'md5:6448d6fd81ce86feac05321f354dbdc8',
'duration': 144,
'uploader': 'codebits',
'upload_date': '20140427',
'categories': ['codebits', 'codebits2014'],
},
},
{
'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz',
'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac',
'note': 'v2 video',
'info_dict': {
'id': 'yLqjzPtbTimsn2wWBKHz',
'ext': 'mp4',
'title': 'Hipnose Condicionativa 4',
'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40',
'duration': 692,
'uploader': 'sapozen',
'upload_date': '20090609',
'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'],
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
item = self._download_xml(
'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item')
title = item.find('./title').text
description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text
thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url')
duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text)
uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text
upload_date = unified_strdate(item.find('./pubDate').text)
view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text)
comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text)
tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text
categories = tags.split() if tags else []
age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0
video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text
video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x')
formats = [{
'url': video_url,
'ext': 'mp4',
'format_id': 'sd',
'width': int(video_size[0]),
'height': int(video_size[1]),
}]
if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true':
formats.append({
'url': re.sub(r'/mov/1$', '/mov/39', video_url),
'ext': 'mp4',
'format_id': 'hd',
'width': 1280,
'height': 720,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'uploader': uploader,
'upload_date': upload_date,
'view_count': view_count,
'comment_count': comment_count,
'categories': categories,
'age_limit': age_limit,
'formats': formats,
}

View File

@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
compat_parse_qs, compat_parse_qs,
compat_urllib_request, compat_urllib_request,
) )
@@ -12,7 +13,7 @@ from ..utils import (
class ScreencastIE(InfoExtractor): class ScreencastIE(InfoExtractor):
_VALID_URL = r'https?://www\.screencast\.com/t/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://www\.screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://www.screencast.com/t/3ZEjQXlT', 'url': 'http://www.screencast.com/t/3ZEjQXlT',
'md5': '917df1c13798a3e96211dd1561fded83', 'md5': '917df1c13798a3e96211dd1561fded83',
'info_dict': { 'info_dict': {
@@ -20,24 +21,87 @@ class ScreencastIE(InfoExtractor):
'ext': 'm4v', 'ext': 'm4v',
'title': 'Color Measurement with Ocean Optics Spectrometers', 'title': 'Color Measurement with Ocean Optics Spectrometers',
'description': 'md5:240369cde69d8bed61349a199c5fb153', 'description': 'md5:240369cde69d8bed61349a199c5fb153',
'thumbnail': 're:^https?://.*\.jpg$' 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
} }
} }, {
'url': 'http://www.screencast.com/t/V2uXehPJa1ZI',
'md5': 'e8e4b375a7660a9e7e35c33973410d34',
'info_dict': {
'id': 'V2uXehPJa1ZI',
'ext': 'mov',
'title': 'The Amadeus Spectrometer',
'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit',
'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
}
}, {
'url': 'http://www.screencast.com/t/aAB3iowa',
'md5': 'dedb2734ed00c9755761ccaee88527cd',
'info_dict': {
'id': 'aAB3iowa',
'ext': 'mp4',
'title': 'Google Earth Export',
'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.',
'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
}
}, {
'url': 'http://www.screencast.com/t/X3ddTrYh',
'md5': '669ee55ff9c51988b4ebc0877cc8b159',
'info_dict': {
'id': 'X3ddTrYh',
'ext': 'wmv',
'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression',
'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
}
},
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
flash_vars_s = self._html_search_regex( video_url = self._html_search_regex(
r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars') r'<embed name="Video".*?src="([^"]+)"', webpage,
flash_vars = compat_parse_qs(flash_vars_s) 'QuickTime embed', default=None)
thumbnail = flash_vars.get('thumb', [None])[0] if video_url is None:
video_url_raw = compat_urllib_request.quote(flash_vars['content'][0]) flash_vars_s = self._html_search_regex(
video_url = video_url_raw.replace('http%3A', 'http:') r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars',
title = self._og_search_title(webpage) default=None)
description = self._og_search_description(webpage) if not flash_vars_s:
flash_vars_s = self._html_search_regex(
r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars',
default=None)
if flash_vars_s:
flash_vars_s = flash_vars_s.replace(',', '&')
if flash_vars_s:
flash_vars = compat_parse_qs(flash_vars_s)
video_url_raw = compat_urllib_request.quote(
flash_vars['content'][0])
video_url = video_url_raw.replace('http%3A', 'http:')
if video_url is None:
video_meta = self._html_search_meta(
'og:video', webpage, default=None)
if video_meta:
video_url = self._search_regex(
r'src=(.*?)(?:$|&)', video_meta,
'meta tag video URL', default=None)
if video_url is None:
raise ExtractorError('Cannot find video')
title = self._og_search_title(webpage, default=None)
if title is None:
title = self._html_search_regex(
[r'<b>Title:</b> ([^<]*)</div>',
r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
webpage, 'title')
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage, default=None)
if description is None:
description = self._html_search_meta('description', webpage)
return { return {
'id': video_id, 'id': video_id,

View File

@@ -81,16 +81,16 @@ class SoundcloudIE(InfoExtractor):
}, },
# downloadable song # downloadable song
{ {
'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1', 'url': 'https://soundcloud.com/oddsamples/bus-brakes',
'md5': '56a8b69568acaa967b4c49f9d1d52d19', 'md5': 'fee7b8747b09bb755cefd4b853e7249a',
'info_dict': { 'info_dict': {
'id': '105614606', 'id': '128590877',
'ext': 'wav', 'ext': 'wav',
'title': 'Just Your Problem Baby (Acapella)', 'title': 'Bus Brakes',
'description': 'Vocals', 'description': 'md5:0170be75dd395c96025d210d261c784e',
'uploader': 'Sim Gretina', 'uploader': 'oddsamples',
'upload_date': '20130815', 'upload_date': '20140109',
#'duration': 42, 'duration': 17,
}, },
}, },
] ]

View File

@@ -3,24 +3,24 @@ from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
class SouthParkStudiosIE(MTVServicesInfoExtractor): class SouthParkIE(MTVServicesInfoExtractor):
IE_NAME = 'southparkstudios.com' IE_NAME = 'southpark.cc.com'
_VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
_TESTS = [{ _TESTS = [{
'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
'info_dict': { 'info_dict': {
'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bat Daded', 'title': 'South Park|Bat Daded',
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
}, },
}] }]
class SouthparkDeIE(SouthParkStudiosIE): class SouthparkDeIE(SouthParkIE):
IE_NAME = 'southpark.de' IE_NAME = 'southpark.de'
_VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'

View File

@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
from .discovery import DiscoveryIE from .discovery import DiscoveryIE
from ..utils import compat_urlparse
class TlcIE(DiscoveryIE): class TlcIE(DiscoveryIE):
@@ -51,6 +52,10 @@ class TlcDeIE(InfoExtractor):
# Otherwise we don't get the correct 'BrightcoveExperience' element, # Otherwise we don't get the correct 'BrightcoveExperience' element,
# example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/ # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
iframe_url = iframe_url.replace('.htm?', '.php?') iframe_url = iframe_url.replace('.htm?', '.php?')
url_fragment = compat_urlparse.urlparse(url).fragment
if url_fragment:
# Since the fragment is not send to the server, we always get the same iframe
iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url)
iframe = self._download_webpage(iframe_url, title) iframe = self._download_webpage(iframe_url, title)
return { return {

View File

@@ -1,21 +1,21 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64 import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import compat_parse_qs
compat_parse_qs,
)
class TutvIE(InfoExtractor): class TutvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)' _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
_TEST = { _TEST = {
'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc', 'url': 'http://tu.tv/videos/robots-futbolistas',
'file': '2742556.flv', 'md5': '627c7c124ac2a9b5ab6addb94e0e65f7',
'md5': '5eb766671f69b82e528dc1e7769c5cb2',
'info_dict': { 'info_dict': {
'title': 'Noah en pabellon cuahutemoc', 'id': '2973058',
'ext': 'flv',
'title': 'Robots futbolistas',
}, },
} }
@@ -26,10 +26,9 @@ class TutvIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id) data_content = self._download_webpage(
data_content = self._download_webpage(data_url, video_id, note='Downloading video info') 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
data = compat_parse_qs(data_content) video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
return { return {
'id': internal_id, 'id': internal_id,

View File

@@ -14,6 +14,7 @@ import zlib
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
from ..utils import ( from ..utils import (
compat_chr, compat_chr,
compat_parse_qs, compat_parse_qs,
@@ -347,8 +348,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self.to_screen(u'RTMP download detected') self.to_screen(u'RTMP download detected')
def _extract_signature_function(self, video_id, player_url, slen): def _extract_signature_function(self, video_id, player_url, slen):
id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$', id_m = re.match(
player_url) r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$',
player_url)
player_type = id_m.group('ext') player_type = id_m.group('ext')
player_id = id_m.group('id') player_id = id_m.group('id')
@@ -449,417 +451,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return lambda s: initial_function([s]) return lambda s: initial_function([s])
def _parse_sig_swf(self, file_contents): def _parse_sig_swf(self, file_contents):
if file_contents[1:3] != b'WS': swfi = SWFInterpreter(file_contents)
raise ExtractorError(
u'Not an SWF file; header is %r' % file_contents[:3])
if file_contents[:1] == b'C':
content = zlib.decompress(file_contents[8:])
else:
raise NotImplementedError(u'Unsupported compression format %r' %
file_contents[:1])
def extract_tags(content):
pos = 0
while pos < len(content):
header16 = struct.unpack('<H', content[pos:pos+2])[0]
pos += 2
tag_code = header16 >> 6
tag_len = header16 & 0x3f
if tag_len == 0x3f:
tag_len = struct.unpack('<I', content[pos:pos+4])[0]
pos += 4
assert pos+tag_len <= len(content)
yield (tag_code, content[pos:pos+tag_len])
pos += tag_len
code_tag = next(tag
for tag_code, tag in extract_tags(content)
if tag_code == 82)
p = code_tag.index(b'\0', 4) + 1
code_reader = io.BytesIO(code_tag[p:])
# Parse ABC (AVM2 ByteCode)
def read_int(reader=None):
if reader is None:
reader = code_reader
res = 0
shift = 0
for _ in range(5):
buf = reader.read(1)
assert len(buf) == 1
b = struct.unpack('<B', buf)[0]
res = res | ((b & 0x7f) << shift)
if b & 0x80 == 0:
break
shift += 7
return res
def u30(reader=None):
res = read_int(reader)
assert res & 0xf0000000 == 0
return res
u32 = read_int
def s32(reader=None):
v = read_int(reader)
if v & 0x80000000 != 0:
v = - ((v ^ 0xffffffff) + 1)
return v
def read_string(reader=None):
if reader is None:
reader = code_reader
slen = u30(reader)
resb = reader.read(slen)
assert len(resb) == slen
return resb.decode('utf-8')
def read_bytes(count, reader=None):
if reader is None:
reader = code_reader
resb = reader.read(count)
assert len(resb) == count
return resb
def read_byte(reader=None):
resb = read_bytes(1, reader=reader)
res = struct.unpack('<B', resb)[0]
return res
# minor_version + major_version
read_bytes(2 + 2)
# Constant pool
int_count = u30()
for _c in range(1, int_count):
s32()
uint_count = u30()
for _c in range(1, uint_count):
u32()
double_count = u30()
read_bytes((double_count-1) * 8)
string_count = u30()
constant_strings = [u'']
for _c in range(1, string_count):
s = read_string()
constant_strings.append(s)
namespace_count = u30()
for _c in range(1, namespace_count):
read_bytes(1) # kind
u30() # name
ns_set_count = u30()
for _c in range(1, ns_set_count):
count = u30()
for _c2 in range(count):
u30()
multiname_count = u30()
MULTINAME_SIZES = {
0x07: 2, # QName
0x0d: 2, # QNameA
0x0f: 1, # RTQName
0x10: 1, # RTQNameA
0x11: 0, # RTQNameL
0x12: 0, # RTQNameLA
0x09: 2, # Multiname
0x0e: 2, # MultinameA
0x1b: 1, # MultinameL
0x1c: 1, # MultinameLA
}
multinames = [u'']
for _c in range(1, multiname_count):
kind = u30()
assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
if kind == 0x07:
u30() # namespace_idx
name_idx = u30()
multinames.append(constant_strings[name_idx])
else:
multinames.append('[MULTINAME kind: %d]' % kind)
for _c2 in range(MULTINAME_SIZES[kind]):
u30()
# Methods
method_count = u30()
MethodInfo = collections.namedtuple(
'MethodInfo',
['NEED_ARGUMENTS', 'NEED_REST'])
method_infos = []
for method_id in range(method_count):
param_count = u30()
u30() # return type
for _ in range(param_count):
u30() # param type
u30() # name index (always 0 for youtube)
flags = read_byte()
if flags & 0x08 != 0:
# Options present
option_count = u30()
for c in range(option_count):
u30() # val
read_bytes(1) # kind
if flags & 0x80 != 0:
# Param names present
for _ in range(param_count):
u30() # param name
mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
method_infos.append(mi)
# Metadata
metadata_count = u30()
for _c in range(metadata_count):
u30() # name
item_count = u30()
for _c2 in range(item_count):
u30() # key
u30() # value
def parse_traits_info():
trait_name_idx = u30()
kind_full = read_byte()
kind = kind_full & 0x0f
attrs = kind_full >> 4
methods = {}
if kind in [0x00, 0x06]: # Slot or Const
u30() # Slot id
u30() # type_name_idx
vindex = u30()
if vindex != 0:
read_byte() # vkind
elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
u30() # disp_id
method_idx = u30()
methods[multinames[trait_name_idx]] = method_idx
elif kind == 0x04: # Class
u30() # slot_id
u30() # classi
elif kind == 0x05: # Function
u30() # slot_id
function_idx = u30()
methods[function_idx] = multinames[trait_name_idx]
else:
raise ExtractorError(u'Unsupported trait kind %d' % kind)
if attrs & 0x4 != 0: # Metadata present
metadata_count = u30()
for _c3 in range(metadata_count):
u30() # metadata index
return methods
# Classes
TARGET_CLASSNAME = u'SignatureDecipher' TARGET_CLASSNAME = u'SignatureDecipher'
searched_idx = multinames.index(TARGET_CLASSNAME) searched_class = swfi.extract_class(TARGET_CLASSNAME)
searched_class_id = None initial_function = swfi.extract_function(searched_class, u'decipher')
class_count = u30()
for class_id in range(class_count):
name_idx = u30()
if name_idx == searched_idx:
# We found the class we're looking for!
searched_class_id = class_id
u30() # super_name idx
flags = read_byte()
if flags & 0x08 != 0: # Protected namespace is present
u30() # protected_ns_idx
intrf_count = u30()
for _c2 in range(intrf_count):
u30()
u30() # iinit
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
if searched_class_id is None:
raise ExtractorError(u'Target class %r not found' %
TARGET_CLASSNAME)
method_names = {}
method_idxs = {}
for class_id in range(class_count):
u30() # cinit
trait_count = u30()
for _c2 in range(trait_count):
trait_methods = parse_traits_info()
if class_id == searched_class_id:
method_names.update(trait_methods.items())
method_idxs.update(dict(
(idx, name)
for name, idx in trait_methods.items()))
# Scripts
script_count = u30()
for _c in range(script_count):
u30() # init
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
# Method bodies
method_body_count = u30()
Method = collections.namedtuple('Method', ['code', 'local_count'])
methods = {}
for _c in range(method_body_count):
method_idx = u30()
u30() # max_stack
local_count = u30()
u30() # init_scope_depth
u30() # max_scope_depth
code_length = u30()
code = read_bytes(code_length)
if method_idx in method_idxs:
m = Method(code, local_count)
methods[method_idxs[method_idx]] = m
exception_count = u30()
for _c2 in range(exception_count):
u30() # from
u30() # to
u30() # target
u30() # exc_type
u30() # var_name
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
assert p + code_reader.tell() == len(code_tag)
assert len(methods) == len(method_idxs)
method_pyfunctions = {}
def extract_function(func_name):
if func_name in method_pyfunctions:
return method_pyfunctions[func_name]
if func_name not in methods:
raise ExtractorError(u'Cannot find function %r' % func_name)
m = methods[func_name]
def resfunc(args):
registers = ['(this)'] + list(args) + [None] * m.local_count
stack = []
coder = io.BytesIO(m.code)
while True:
opcode = struct.unpack('!B', coder.read(1))[0]
if opcode == 36: # pushbyte
v = struct.unpack('!B', coder.read(1))[0]
stack.append(v)
elif opcode == 44: # pushstring
idx = u30(coder)
stack.append(constant_strings[idx])
elif opcode == 48: # pushscope
# We don't implement the scope register, so we'll just
# ignore the popped value
stack.pop()
elif opcode == 70: # callproperty
index = u30(coder)
mname = multinames[index]
arg_count = u30(coder)
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
if mname == u'split':
assert len(args) == 1
assert isinstance(args[0], compat_str)
assert isinstance(obj, compat_str)
if args[0] == u'':
res = list(obj)
else:
res = obj.split(args[0])
stack.append(res)
elif mname == u'slice':
assert len(args) == 1
assert isinstance(args[0], int)
assert isinstance(obj, list)
res = obj[args[0]:]
stack.append(res)
elif mname == u'join':
assert len(args) == 1
assert isinstance(args[0], compat_str)
assert isinstance(obj, list)
res = args[0].join(obj)
stack.append(res)
elif mname in method_pyfunctions:
stack.append(method_pyfunctions[mname](args))
else:
raise NotImplementedError(
u'Unsupported property %r on %r'
% (mname, obj))
elif opcode == 72: # returnvalue
res = stack.pop()
return res
elif opcode == 79: # callpropvoid
index = u30(coder)
mname = multinames[index]
arg_count = u30(coder)
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
if mname == u'reverse':
assert isinstance(obj, list)
obj.reverse()
else:
raise NotImplementedError(
u'Unsupported (void) property %r on %r'
% (mname, obj))
elif opcode == 93: # findpropstrict
index = u30(coder)
mname = multinames[index]
res = extract_function(mname)
stack.append(res)
elif opcode == 97: # setproperty
index = u30(coder)
value = stack.pop()
idx = stack.pop()
obj = stack.pop()
assert isinstance(obj, list)
assert isinstance(idx, int)
obj[idx] = value
elif opcode == 98: # getlocal
index = u30(coder)
stack.append(registers[index])
elif opcode == 99: # setlocal
index = u30(coder)
value = stack.pop()
registers[index] = value
elif opcode == 102: # getproperty
index = u30(coder)
pname = multinames[index]
if pname == u'length':
obj = stack.pop()
assert isinstance(obj, list)
stack.append(len(obj))
else: # Assume attribute access
idx = stack.pop()
assert isinstance(idx, int)
obj = stack.pop()
assert isinstance(obj, list)
stack.append(obj[idx])
elif opcode == 128: # coerce
u30(coder)
elif opcode == 133: # coerce_s
assert isinstance(stack[-1], (type(None), compat_str))
elif opcode == 164: # modulo
value2 = stack.pop()
value1 = stack.pop()
res = value1 % value2
stack.append(res)
elif opcode == 208: # getlocal_0
stack.append(registers[0])
elif opcode == 209: # getlocal_1
stack.append(registers[1])
elif opcode == 210: # getlocal_2
stack.append(registers[2])
elif opcode == 211: # getlocal_3
stack.append(registers[3])
elif opcode == 214: # setlocal_2
registers[2] = stack.pop()
elif opcode == 215: # setlocal_3
registers[3] = stack.pop()
else:
raise NotImplementedError(
u'Unsupported opcode %d' % opcode)
method_pyfunctions[func_name] = resfunc
return resfunc
initial_function = extract_function(u'decipher')
return lambda s: initial_function([s]) return lambda s: initial_function([s])
def _decrypt_signature(self, s, video_id, player_url, age_gate=False): def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
@@ -1014,14 +609,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
age_gate = True age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id} # We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube # this can be viewed without login into Youtube
data = compat_urllib_parse.urlencode({'video_id': video_id, data = compat_urllib_parse.urlencode({
'el': 'player_embedded', 'video_id': video_id,
'gl': 'US', 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
'hl': 'en', 'sts':'16268',
'eurl': 'https://youtube.googleapis.com/v/' + video_id, })
'asv': 3,
'sts':'1588',
})
video_info_url = proto + '://www.youtube.com/get_video_info?' + data video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id, video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False, note=False,
@@ -1220,30 +812,37 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url += '&signature=' + url_data['sig'][0] url += '&signature=' + url_data['sig'][0]
elif 's' in url_data: elif 's' in url_data:
encrypted_sig = url_data['s'][0] encrypted_sig = url_data['s'][0]
if self._downloader.params.get('verbose'):
if age_gate:
if player_url is None:
player_version = 'unknown'
else:
player_version = self._search_regex(
r'-(.+)\.swf$', player_url,
u'flash player', fatal=False)
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
player_desc = u'html5 player %s' % player_version
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
if not age_gate: if not age_gate:
jsplayer_url_json = self._search_regex( jsplayer_url_json = self._search_regex(
r'"assets":.+?"js":\s*("[^"]+")', r'"assets":.+?"js":\s*("[^"]+")',
video_webpage, u'JS player URL') video_webpage, u'JS player URL')
player_url = json.loads(jsplayer_url_json) player_url = json.loads(jsplayer_url_json)
if player_url is None:
player_url_json = self._search_regex(
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
video_webpage, u'age gate player URL')
player_url = json.loads(player_url_json)
if self._downloader.params.get('verbose'):
if player_url is None:
player_version = 'unknown'
player_desc = 'unknown'
else:
if player_url.endswith('swf'):
player_version = self._search_regex(
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
u'flash player', fatal=False)
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
player_desc = u'html5 player %s' % player_version
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
signature = self._decrypt_signature( signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate) encrypted_sig, video_id, player_url, age_gate)

View File

@@ -11,6 +11,7 @@ class JSInterpreter(object):
def __init__(self, code): def __init__(self, code):
self.code = code self.code = code
self._functions = {} self._functions = {}
self._objects = {}
def interpret_statement(self, stmt, local_vars, allow_recursion=20): def interpret_statement(self, stmt, local_vars, allow_recursion=20):
if allow_recursion < 0: if allow_recursion < 0:
@@ -55,7 +56,19 @@ class JSInterpreter(object):
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr) m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
if m: if m:
member = m.group('member') member = m.group('member')
val = local_vars[m.group('in')] variable = m.group('in')
if variable not in local_vars:
if variable not in self._objects:
self._objects[variable] = self.extract_object(variable)
obj = self._objects[variable]
key, args = member.split('(', 1)
args = args.strip(')')
argvals = [int(v) if v.isdigit() else local_vars[v]
for v in args.split(',')]
return obj[key](argvals)
val = local_vars[variable]
if member == 'split("")': if member == 'split("")':
return list(val) return list(val)
if member == 'join("")': if member == 'join("")':
@@ -97,6 +110,25 @@ class JSInterpreter(object):
return self._functions[fname](argvals) return self._functions[fname](argvals)
raise ExtractorError('Unsupported JS expression %r' % expr) raise ExtractorError('Unsupported JS expression %r' % expr)
def extract_object(self, objname):
obj = {}
obj_m = re.search(
(r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
r'\}\s*;',
self.code)
fields = obj_m.group('fields')
# Currently, it only supports function definitions
fields_m = re.finditer(
r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
fields)
for f in fields_m:
argnames = f.group('args').split(',')
obj[f.group('key')] = self.build_function(argnames, f.group('code'))
return obj
def extract_function(self, funcname): def extract_function(self, funcname):
func_m = re.search( func_m = re.search(
(r'(?:function %s|[{;]%s\s*=\s*function)' % ( (r'(?:function %s|[{;]%s\s*=\s*function)' % (
@@ -107,10 +139,12 @@ class JSInterpreter(object):
raise ExtractorError('Could not find JS function %r' % funcname) raise ExtractorError('Could not find JS function %r' % funcname)
argnames = func_m.group('args').split(',') argnames = func_m.group('args').split(',')
return self.build_function(argnames, func_m.group('code'))
def build_function(self, argnames, code):
def resf(args): def resf(args):
local_vars = dict(zip(argnames, args)) local_vars = dict(zip(argnames, args))
for stmt in func_m.group('code').split(';'): for stmt in code.split(';'):
res = self.interpret_statement(stmt, local_vars) res = self.interpret_statement(stmt, local_vars)
return res return res
return resf return resf

610
youtube_dl/swfinterp.py Normal file
View File

@@ -0,0 +1,610 @@
from __future__ import unicode_literals
import collections
import io
import zlib
from .utils import (
compat_str,
ExtractorError,
struct_unpack,
)
def _extract_tags(file_contents):
if file_contents[1:3] != b'WS':
raise ExtractorError(
'Not an SWF file; header is %r' % file_contents[:3])
if file_contents[:1] == b'C':
content = zlib.decompress(file_contents[8:])
else:
raise NotImplementedError(
'Unsupported compression format %r' %
file_contents[:1])
# Determine number of bits in framesize rectangle
framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3
framesize_len = (5 + 4 * framesize_nbits + 7) // 8
pos = framesize_len + 2 + 2
while pos < len(content):
header16 = struct_unpack('<H', content[pos:pos + 2])[0]
pos += 2
tag_code = header16 >> 6
tag_len = header16 & 0x3f
if tag_len == 0x3f:
tag_len = struct_unpack('<I', content[pos:pos + 4])[0]
pos += 4
assert pos + tag_len <= len(content), \
('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
% (tag_code, pos, tag_len, len(content)))
yield (tag_code, content[pos:pos + tag_len])
pos += tag_len
class _AVMClass_Object(object):
def __init__(self, avm_class):
self.avm_class = avm_class
def __repr__(self):
return '%s#%x' % (self.avm_class.name, id(self))
class _ScopeDict(dict):
def __init__(self, avm_class):
super(_ScopeDict, self).__init__()
self.avm_class = avm_class
def __repr__(self):
return '%s__Scope(%s)' % (
self.avm_class.name,
super(_ScopeDict, self).__repr__())
class _AVMClass(object):
def __init__(self, name_idx, name):
self.name_idx = name_idx
self.name = name
self.method_names = {}
self.method_idxs = {}
self.methods = {}
self.method_pyfunctions = {}
self.variables = _ScopeDict(self)
def make_object(self):
return _AVMClass_Object(self)
def __repr__(self):
return '_AVMClass(%s)' % (self.name)
def register_methods(self, methods):
self.method_names.update(methods.items())
self.method_idxs.update(dict(
(idx, name)
for name, idx in methods.items()))
class _Multiname(object):
def __init__(self, kind):
self.kind = kind
def __repr__(self):
return '[MULTINAME kind: 0x%x]' % self.kind
def _read_int(reader):
res = 0
shift = 0
for _ in range(5):
buf = reader.read(1)
assert len(buf) == 1
b = struct_unpack('<B', buf)[0]
res = res | ((b & 0x7f) << shift)
if b & 0x80 == 0:
break
shift += 7
return res
def _u30(reader):
res = _read_int(reader)
assert res & 0xf0000000 == 0
return res
u32 = _read_int
def _s32(reader):
v = _read_int(reader)
if v & 0x80000000 != 0:
v = - ((v ^ 0xffffffff) + 1)
return v
def _s24(reader):
bs = reader.read(3)
assert len(bs) == 3
last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
return struct_unpack('<i', bs + last_byte)[0]
def _read_string(reader):
slen = _u30(reader)
resb = reader.read(slen)
assert len(resb) == slen
return resb.decode('utf-8')
def _read_bytes(count, reader):
assert count >= 0
resb = reader.read(count)
assert len(resb) == count
return resb
def _read_byte(reader):
resb = _read_bytes(1, reader=reader)
res = struct_unpack('<B', resb)[0]
return res
class SWFInterpreter(object):
def __init__(self, file_contents):
code_tag = next(tag
for tag_code, tag in _extract_tags(file_contents)
if tag_code == 82)
p = code_tag.index(b'\0', 4) + 1
code_reader = io.BytesIO(code_tag[p:])
# Parse ABC (AVM2 ByteCode)
# Define a couple convenience methods
u30 = lambda *args: _u30(*args, reader=code_reader)
s32 = lambda *args: _s32(*args, reader=code_reader)
u32 = lambda *args: _u32(*args, reader=code_reader)
read_bytes = lambda *args: _read_bytes(*args, reader=code_reader)
read_byte = lambda *args: _read_byte(*args, reader=code_reader)
# minor_version + major_version
read_bytes(2 + 2)
# Constant pool
int_count = u30()
for _c in range(1, int_count):
s32()
uint_count = u30()
for _c in range(1, uint_count):
u32()
double_count = u30()
read_bytes(max(0, (double_count - 1)) * 8)
string_count = u30()
self.constant_strings = ['']
for _c in range(1, string_count):
s = _read_string(code_reader)
self.constant_strings.append(s)
namespace_count = u30()
for _c in range(1, namespace_count):
read_bytes(1) # kind
u30() # name
ns_set_count = u30()
for _c in range(1, ns_set_count):
count = u30()
for _c2 in range(count):
u30()
multiname_count = u30()
MULTINAME_SIZES = {
0x07: 2, # QName
0x0d: 2, # QNameA
0x0f: 1, # RTQName
0x10: 1, # RTQNameA
0x11: 0, # RTQNameL
0x12: 0, # RTQNameLA
0x09: 2, # Multiname
0x0e: 2, # MultinameA
0x1b: 1, # MultinameL
0x1c: 1, # MultinameLA
}
self.multinames = ['']
for _c in range(1, multiname_count):
kind = u30()
assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind
if kind == 0x07:
u30() # namespace_idx
name_idx = u30()
self.multinames.append(self.constant_strings[name_idx])
else:
self.multinames.append(_Multiname(kind))
for _c2 in range(MULTINAME_SIZES[kind]):
u30()
# Methods
method_count = u30()
MethodInfo = collections.namedtuple(
'MethodInfo',
['NEED_ARGUMENTS', 'NEED_REST'])
method_infos = []
for method_id in range(method_count):
param_count = u30()
u30() # return type
for _ in range(param_count):
u30() # param type
u30() # name index (always 0 for youtube)
flags = read_byte()
if flags & 0x08 != 0:
# Options present
option_count = u30()
for c in range(option_count):
u30() # val
read_bytes(1) # kind
if flags & 0x80 != 0:
# Param names present
for _ in range(param_count):
u30() # param name
mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
method_infos.append(mi)
# Metadata
metadata_count = u30()
for _c in range(metadata_count):
u30() # name
item_count = u30()
for _c2 in range(item_count):
u30() # key
u30() # value
def parse_traits_info():
trait_name_idx = u30()
kind_full = read_byte()
kind = kind_full & 0x0f
attrs = kind_full >> 4
methods = {}
if kind in [0x00, 0x06]: # Slot or Const
u30() # Slot id
u30() # type_name_idx
vindex = u30()
if vindex != 0:
read_byte() # vkind
elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
u30() # disp_id
method_idx = u30()
methods[self.multinames[trait_name_idx]] = method_idx
elif kind == 0x04: # Class
u30() # slot_id
u30() # classi
elif kind == 0x05: # Function
u30() # slot_id
function_idx = u30()
methods[function_idx] = self.multinames[trait_name_idx]
else:
raise ExtractorError('Unsupported trait kind %d' % kind)
if attrs & 0x4 != 0: # Metadata present
metadata_count = u30()
for _c3 in range(metadata_count):
u30() # metadata index
return methods
# Classes
class_count = u30()
classes = []
for class_id in range(class_count):
name_idx = u30()
cname = self.multinames[name_idx]
avm_class = _AVMClass(name_idx, cname)
classes.append(avm_class)
u30() # super_name idx
flags = read_byte()
if flags & 0x08 != 0: # Protected namespace is present
u30() # protected_ns_idx
intrf_count = u30()
for _c2 in range(intrf_count):
u30()
u30() # iinit
trait_count = u30()
for _c2 in range(trait_count):
trait_methods = parse_traits_info()
avm_class.register_methods(trait_methods)
assert len(classes) == class_count
self._classes_by_name = dict((c.name, c) for c in classes)
for avm_class in classes:
u30() # cinit
trait_count = u30()
for _c2 in range(trait_count):
trait_methods = parse_traits_info()
avm_class.register_methods(trait_methods)
# Scripts
script_count = u30()
for _c in range(script_count):
u30() # init
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
# Method bodies
method_body_count = u30()
Method = collections.namedtuple('Method', ['code', 'local_count'])
for _c in range(method_body_count):
method_idx = u30()
u30() # max_stack
local_count = u30()
u30() # init_scope_depth
u30() # max_scope_depth
code_length = u30()
code = read_bytes(code_length)
for avm_class in classes:
if method_idx in avm_class.method_idxs:
m = Method(code, local_count)
avm_class.methods[avm_class.method_idxs[method_idx]] = m
exception_count = u30()
for _c2 in range(exception_count):
u30() # from
u30() # to
u30() # target
u30() # exc_type
u30() # var_name
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
assert p + code_reader.tell() == len(code_tag)
def extract_class(self, class_name):
try:
return self._classes_by_name[class_name]
except KeyError:
raise ExtractorError('Class %r not found' % class_name)
def extract_function(self, avm_class, func_name):
if func_name in avm_class.method_pyfunctions:
return avm_class.method_pyfunctions[func_name]
if func_name in self._classes_by_name:
return self._classes_by_name[func_name].make_object()
if func_name not in avm_class.methods:
raise ExtractorError('Cannot find function %s.%s' % (
avm_class.name, func_name))
m = avm_class.methods[func_name]
def resfunc(args):
# Helper functions
coder = io.BytesIO(m.code)
s24 = lambda: _s24(coder)
u30 = lambda: _u30(coder)
registers = [avm_class.variables] + list(args) + [None] * m.local_count
stack = []
scopes = collections.deque([
self._classes_by_name, avm_class.variables])
while True:
opcode = _read_byte(coder)
if opcode == 17: # iftrue
offset = s24()
value = stack.pop()
if value:
coder.seek(coder.tell() + offset)
elif opcode == 18: # iffalse
offset = s24()
value = stack.pop()
if not value:
coder.seek(coder.tell() + offset)
elif opcode == 36: # pushbyte
v = _read_byte(coder)
stack.append(v)
elif opcode == 42: # dup
value = stack[-1]
stack.append(value)
elif opcode == 44: # pushstring
idx = u30()
stack.append(self.constant_strings[idx])
elif opcode == 48: # pushscope
new_scope = stack.pop()
scopes.append(new_scope)
elif opcode == 66: # construct
arg_count = u30()
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
res = obj.avm_class.make_object()
stack.append(res)
elif opcode == 70: # callproperty
index = u30()
mname = self.multinames[index]
arg_count = u30()
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
if isinstance(obj, _AVMClass_Object):
func = self.extract_function(obj.avm_class, mname)
res = func(args)
stack.append(res)
continue
elif isinstance(obj, _ScopeDict):
if mname in obj.avm_class.method_names:
func = self.extract_function(obj.avm_class, mname)
res = func(args)
else:
res = obj[mname]
stack.append(res)
continue
elif isinstance(obj, compat_str):
if mname == 'split':
assert len(args) == 1
assert isinstance(args[0], compat_str)
if args[0] == '':
res = list(obj)
else:
res = obj.split(args[0])
stack.append(res)
continue
elif isinstance(obj, list):
if mname == 'slice':
assert len(args) == 1
assert isinstance(args[0], int)
res = obj[args[0]:]
stack.append(res)
continue
elif mname == 'join':
assert len(args) == 1
assert isinstance(args[0], compat_str)
res = args[0].join(obj)
stack.append(res)
continue
raise NotImplementedError(
'Unsupported property %r on %r'
% (mname, obj))
elif opcode == 72: # returnvalue
res = stack.pop()
return res
elif opcode == 74: # constructproperty
index = u30()
arg_count = u30()
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
mname = self.multinames[index]
assert isinstance(obj, _AVMClass)
construct_method = self.extract_function(
obj, mname)
# We do not actually call the constructor for now;
# we just pretend it does nothing
stack.append(obj.make_object())
elif opcode == 79: # callpropvoid
index = u30()
mname = self.multinames[index]
arg_count = u30()
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
if mname == 'reverse':
assert isinstance(obj, list)
obj.reverse()
else:
raise NotImplementedError(
'Unsupported (void) property %r on %r'
% (mname, obj))
elif opcode == 86: # newarray
arg_count = u30()
arr = []
for i in range(arg_count):
arr.append(stack.pop())
arr = arr[::-1]
stack.append(arr)
elif opcode == 93: # findpropstrict
index = u30()
mname = self.multinames[index]
for s in reversed(scopes):
if mname in s:
res = s
break
else:
res = scopes[0]
stack.append(res[mname])
elif opcode == 94: # findproperty
index = u30()
mname = self.multinames[index]
for s in reversed(scopes):
if mname in s:
res = s
break
else:
res = avm_class.variables
stack.append(res)
elif opcode == 96: # getlex
index = u30()
mname = self.multinames[index]
for s in reversed(scopes):
if mname in s:
scope = s
break
else:
scope = avm_class.variables
# I cannot find where static variables are initialized
# so let's just return None
res = scope.get(mname)
stack.append(res)
elif opcode == 97: # setproperty
index = u30()
value = stack.pop()
idx = self.multinames[index]
if isinstance(idx, _Multiname):
idx = stack.pop()
obj = stack.pop()
obj[idx] = value
elif opcode == 98: # getlocal
index = u30()
stack.append(registers[index])
elif opcode == 99: # setlocal
index = u30()
value = stack.pop()
registers[index] = value
elif opcode == 102: # getproperty
index = u30()
pname = self.multinames[index]
if pname == 'length':
obj = stack.pop()
assert isinstance(obj, list)
stack.append(len(obj))
else: # Assume attribute access
idx = stack.pop()
assert isinstance(idx, int)
obj = stack.pop()
assert isinstance(obj, list)
stack.append(obj[idx])
elif opcode == 115: # convert_
value = stack.pop()
intvalue = int(value)
stack.append(intvalue)
elif opcode == 128: # coerce
u30()
elif opcode == 133: # coerce_s
assert isinstance(stack[-1], (type(None), compat_str))
elif opcode == 160: # add
value2 = stack.pop()
value1 = stack.pop()
res = value1 + value2
stack.append(res)
elif opcode == 161: # subtract
value2 = stack.pop()
value1 = stack.pop()
res = value1 - value2
stack.append(res)
elif opcode == 164: # modulo
value2 = stack.pop()
value1 = stack.pop()
res = value1 % value2
stack.append(res)
elif opcode == 175: # greaterequals
value2 = stack.pop()
value1 = stack.pop()
result = value1 >= value2
stack.append(result)
elif opcode == 208: # getlocal_0
stack.append(registers[0])
elif opcode == 209: # getlocal_1
stack.append(registers[1])
elif opcode == 210: # getlocal_2
stack.append(registers[2])
elif opcode == 211: # getlocal_3
stack.append(registers[3])
elif opcode == 212: # setlocal_0
registers[0] = stack.pop()
elif opcode == 213: # setlocal_1
registers[1] = stack.pop()
elif opcode == 214: # setlocal_2
registers[2] = stack.pop()
elif opcode == 215: # setlocal_3
registers[3] = stack.pop()
else:
raise NotImplementedError(
'Unsupported opcode %d' % opcode)
avm_class.method_pyfunctions[func_name] = resfunc
return resfunc

View File

@@ -1194,6 +1194,8 @@ def format_bytes(bytes):
def str_to_int(int_str): def str_to_int(int_str):
if int_str is None:
return None
int_str = re.sub(r'[,\.]', u'', int_str) int_str = re.sub(r'[,\.]', u'', int_str)
return int(int_str) return int(int_str)
@@ -1428,7 +1430,7 @@ US_RATINGS = {
def strip_jsonp(code): def strip_jsonp(code):
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code) return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
def qualities(quality_ids): def qualities(quality_ids):

View File

@@ -1,2 +1,2 @@
__version__ = '2014.07.11.2' __version__ = '2014.07.20.2'