Compare commits

..

66 Commits

Author SHA1 Message Date
Philipp Hagemeister
561266641f release 2014.09.10.1 2014-09-10 16:39:23 +02:00
Sergey M․
6899f2fe9e [hostingbulk] Simplify 2014-09-10 19:13:22 +07:00
Sergey M.
136c8bd275 Merge pull request #3709 from naglis/hostingbulk
[hostingbulk] Add new extractor
2014-09-10 19:04:14 +07:00
Sergey M․
1bf5423e82 [generic/youtube] Recognize youtube nocookie embeds (Closes #3713) 2014-09-10 18:29:20 +07:00
Sergey M․
2c5c1f48e9 [izlesene] Simplify 2014-09-10 18:24:57 +07:00
Sergey M.
d8e487fbd9 Merge pull request #3710 from naglis/izlesene_fix
Izlesene fix
2014-09-10 18:20:57 +07:00
Philipp Hagemeister
bc7ff0a8dd release 2014.09.10 2014-09-10 12:26:39 +02:00
Naglis Jonaitis
8e9da53140 [izlesene] Remove API URL 2014-09-10 00:24:48 +03:00
Naglis Jonaitis
f1d15e6dbc [izlesene] Adapt to website changes and improve 2014-09-10 00:22:48 +03:00
Naglis Jonaitis
91ebb17ede [hostingbulk] Add new extractor 2014-09-09 22:17:33 +03:00
Sergey M․
c63b30901b [noco] Add authenticate support (Closes #3706) 2014-09-10 01:24:05 +07:00
Sergey M․
f009f19ece [rutv] Fix rutv player regex 2014-09-10 01:09:04 +07:00
Sergey M․
68477e8839 [khanacademy] Improve _VALID_URL (Closes #3695) 2014-09-09 22:09:32 +07:00
Sergey M․
0dc5365564 [moevideo] Make filesize exact 2014-09-09 21:48:10 +07:00
Sergey M.
9face18d08 Merge pull request #3690 from naglis/sharesix
[sharesix] Add new extractor
2014-09-09 21:14:59 +07:00
Sergey M․
ff0ba8ce0f [moevideo] Add videochart.net to _VALID_URL 2014-09-09 21:04:57 +07:00
Sergey M.
759c6293bd Merge pull request #3691 from naglis/moevideo
[moevideo] Add new extractor
2014-09-09 20:56:35 +07:00
Sergey M․
3fbeb95e14 [vgtv] Add view count to test cases 2014-09-08 21:33:03 +07:00
Sergey M․
6e25f51cdf Merge branch 'mrkolby-vgtv' 2014-09-08 21:26:59 +07:00
Sergey M․
321c1e44f9 [vgtv] Extract all formats, add support for wasLive and live streamTypes 2014-09-08 21:26:42 +07:00
Sergey M․
cc7fec5818 Merge branch 'vgtv' of https://github.com/mrkolby/youtube-dl into mrkolby-vgtv 2014-09-08 19:49:23 +07:00
Naglis Jonaitis
5fb9077e8c [moevideo] Add new extractor 2014-09-07 01:21:58 +03:00
Naglis Jonaitis
8e20f81c5b [sharesix] Add new extractor 2014-09-06 18:59:15 +03:00
Philipp Hagemeister
e154762c74 release 2014.09.06 2014-09-06 15:26:38 +02:00
Sergey M․
ba92ab3d05 Merge branch 'Rudloff-unistra_hd' 2014-09-06 15:22:01 +07:00
Sergey M․
a2f0cdc074 [unistra] Modernize 2014-09-06 15:21:27 +07:00
Sergey M․
70a1ecd2c1 Merge branch 'unistra_hd' of https://github.com/Rudloff/youtube-dl into Rudloff-unistra_hd 2014-09-05 22:26:31 +07:00
Sergey M․
88a23aef5a [http] Avoid closing stdout (Fixes #3686) 2014-09-05 22:05:36 +07:00
Sergey M․
140d8d77b3 Credit @mrkolby for dbtv.no (#3685) 2014-09-05 20:00:12 +07:00
Sergey M․
665cd96929 Merge branch 'mrkolby-dbtv' 2014-09-05 19:54:10 +07:00
Sergey M․
4d067a58ca [dbtv] Simplify, modernize, extract all formats 2014-09-05 19:53:53 +07:00
Sergey M․
1c1cff6a52 Merge branch 'dbtv' of https://github.com/mrkolby/youtube-dl into mrkolby-dbtv
Conflicts:
	youtube_dl/extractor/__init__.py
2014-09-05 19:01:11 +07:00
Magnus Kolstad
f063a04f07 [dbtv] Add new extractor 2014-09-05 11:24:30 +02:00
Pierre Rudloff
af8812bb9b Add HD detection to Unistra 2014-09-04 22:22:19 +02:00
Magnus Kolstad
78149a962b [vgtv] Add new extractor
Because of the #! in the URL structure for VGTV we need to add ' before and after given URL. Or else it will cry:

-bash: !/video/100495/lars-og-lars-sesong-6-episode-6-lakselus: event not found
2014-09-04 21:02:47 +02:00
Sergey M․
f2d9e3a370 [arte.tv:+7] Allow single quotes for json vp url regexes (Closes #3676) 2014-09-05 01:17:16 +07:00
Philipp Hagemeister
16e6f396b4 release 2014.09.04.3 2014-09-04 16:20:17 +02:00
Sergey M․
c6ec6b2e8b [arte.tv:+7] Add one another one pattern for json vp url 2014-09-04 20:44:51 +07:00
Sergey M․
7bbc6428b6 [nba] Modernize 2014-09-04 20:06:14 +07:00
Sergey M․
c1a3c9ddb2 [techtalks] Modernize 2014-09-04 19:48:29 +07:00
Sergey M․
feec0f56f5 [toypics:user] Update test playlist count 2014-09-04 19:37:40 +07:00
Sergey M․
8029857d27 [washingtonpost] Add playlist title to test 2014-09-04 19:34:40 +07:00
Philipp Hagemeister
aa61802c1e release 2014.09.04.2 2014-09-04 06:40:44 +02:00
Philipp Hagemeister
f54aee0209 [cliphunter] Add support for more formats 2014-09-04 06:40:15 +02:00
Philipp Hagemeister
5df921b0e3 [test_cache] Add a dot in the file name 2014-09-04 04:51:52 +02:00
Philipp Hagemeister
35d5b67876 release 2014.09.04.1 2014-09-04 04:48:05 +02:00
Philipp Hagemeister
674c869af4 [cache] Allow dots in keys (Fixes #3674) 2014-09-04 04:47:56 +02:00
Philipp Hagemeister
10710ae386 release 2014.09.04 2014-09-04 01:30:43 +02:00
Philipp Hagemeister
a0e07d3161 [youtube] Move cache into its own module 2014-09-03 17:29:19 +02:00
Sergey M․
88fc294f7f Merge branch 'peugeot-tnaflix' 2014-09-03 21:08:50 +07:00
Sergey M․
a232bb9551 [empflix] Rewrite in terms of tnaflix 2014-09-03 21:08:36 +07:00
Sergey M․
eb833b7f5a [tnaflix] Improve and make generic 2014-09-03 21:07:18 +07:00
Sergey M․
f164038b79 [utils] Make parse_duration case insensitive 2014-09-03 21:03:36 +07:00
Sergey M․
f7a361c4f1 Merge branch 'tnaflix' of https://github.com/peugeot/youtube-dl into peugeot-tnaflix 2014-09-03 20:11:49 +07:00
Sergey M․
884ae74785 [tvigle] Adapt to the new API 2014-09-03 19:59:36 +07:00
peugeot
1dba4a2185 Add support for TNAFlix 2014-09-03 14:10:06 +02:00
Philipp Hagemeister
7d4d5f25ed [facebook] Fix login (Fixes #3667) 2014-09-03 09:50:10 +02:00
Sergey M․
33422c056d [drtuber] Add display_id to test 2014-09-02 21:40:03 +07:00
Sergey M․
a7862a1bc8 [eporner] Extract all formats 2014-09-02 21:39:22 +07:00
Sergey M․
3baa62e8d1 [beeg] Extract all formats 2014-09-02 20:54:00 +07:00
Sergey M․
1bf8cf5c2c [drtuber] Extract display_id 2014-09-02 20:39:16 +07:00
Sergey M․
eade1d7eab [drtuber] Extract counters 2014-09-02 20:36:26 +07:00
Sergey M․
1a94ff6865 [mlb] Add support for embedded videos (Closes #3653) 2014-09-02 20:19:28 +07:00
Philipp Hagemeister
b47ed50aaf [nosvideo] Remove determine_ext usage (#3655) 2014-09-02 00:17:04 +02:00
Philipp Hagemeister
1b8477729a Merge remote-tracking branch 'naglis/nosvideo' 2014-09-02 00:14:58 +02:00
Naglis Jonaitis
49fa38adf2 [nosvideo] Add new extractor 2014-09-01 23:47:14 +03:00
35 changed files with 1211 additions and 269 deletions

59
test/test_cache.py Normal file
View File

@@ -0,0 +1,59 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
import shutil
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from youtube_dl.cache import Cache
def _is_empty(d):
return not bool(os.listdir(d))
def _mkdir(d):
if not os.path.exists(d):
os.mkdir(d)
class TestCache(unittest.TestCase):
def setUp(self):
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
_mkdir(TESTDATA_DIR)
self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test')
self.tearDown()
def tearDown(self):
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def test_cache(self):
ydl = FakeYDL({
'cachedir': self.test_dir,
})
c = Cache(ydl)
obj = {'x': 1, 'y': ['ä', '\\a', True]}
self.assertEqual(c.load('test_cache', 'k.'), None)
c.store('test_cache', 'k.', obj)
self.assertEqual(c.load('test_cache', 'k2'), None)
self.assertFalse(_is_empty(self.test_dir))
self.assertEqual(c.load('test_cache', 'k.'), obj)
self.assertEqual(c.load('test_cache', 'y'), None)
self.assertEqual(c.load('test_cache2', 'k.'), None)
c.remove()
self.assertFalse(os.path.exists(self.test_dir))
self.assertEqual(c.load('test_cache', 'k.'), None)
if __name__ == '__main__':
unittest.main()

View File

@@ -57,6 +57,7 @@ from .utils import (
YoutubeDLHandler,
prepend_extension,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader
from .postprocessor import FFmpegMergerPP
@@ -133,7 +134,7 @@ class YoutubeDL(object):
daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
cachedir: Location of the cache files in the filesystem.
None to disable filesystem cache.
False to disable filesystem cache.
noplaylist: Download single video instead of a playlist if in doubt.
age_limit: An integer representing the user's age in years.
Unsuitable videos for the given age are skipped.
@@ -195,6 +196,7 @@ class YoutubeDL(object):
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self._err_file = sys.stderr
self.params = params
self.cache = Cache(self)
if params.get('bidi_workaround', False):
try:

View File

@@ -74,6 +74,7 @@ __authors__ = (
'Keith Beckman',
'Ole Ernst',
'Aaron McDaniel (mcd1992)',
'Magnus Kolstad',
)
__license__ = 'Public Domain'
@@ -84,7 +85,6 @@ import optparse
import os
import random
import shlex
import shutil
import sys
@@ -96,7 +96,6 @@ from .utils import (
decodeOption,
get_term_width,
DownloadError,
get_cachedir,
MaxDownloadsReached,
preferredencoding,
read_batch_urls,
@@ -518,10 +517,10 @@ def parseOpts(overrideArguments=None):
filesystem.add_option('--cookies',
dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
filesystem.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
'--cache-dir', dest='cachedir', default=None, metavar='DIR',
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
filesystem.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
'--no-cache-dir', action='store_const', const=False, dest='cachedir',
help='Disable filesystem caching')
filesystem.add_option(
'--rm-cache-dir', action='store_true', dest='rm_cachedir',
@@ -872,20 +871,7 @@ def _real_main(argv=None):
# Remove cache dir
if opts.rm_cachedir:
if opts.cachedir is None:
ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
else:
if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir' % opts.cachedir)
retcode = 141
else:
ydl.to_screen(
u'Removing cache dir %s .' % opts.cachedir,
skip_eol=True)
if os.path.exists(opts.cachedir):
ydl.to_screen(u'.', skip_eol=True)
shutil.rmtree(opts.cachedir)
ydl.to_screen(u'.')
ydl.cache.remove()
# Maybe do nothing
if (len(all_urls) < 1) and (opts.load_info_filename is None):

94
youtube_dl/cache.py Normal file
View File

@@ -0,0 +1,94 @@
from __future__ import unicode_literals
import errno
import io
import json
import os
import re
import shutil
import traceback
from .utils import (
write_json_file,
)
class Cache(object):
def __init__(self, ydl):
self._ydl = ydl
def _get_root_dir(self):
res = self._ydl.params.get('cachedir')
if res is None:
cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, 'youtube-dl')
return os.path.expanduser(res)
def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
'invalid section %r' % section
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
return os.path.join(
self._get_root_dir(), section, '%s.%s' % (key, dtype))
@property
def enabled(self):
return self._ydl.params.get('cachedir') is not False
def store(self, section, key, data, dtype='json'):
assert dtype in ('json',)
if not self.enabled:
return
fn = self._get_cache_fn(section, key, dtype)
try:
try:
os.makedirs(os.path.dirname(fn))
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
write_json_file(data, fn)
except Exception:
tb = traceback.format_exc()
self._ydl.report_warning(
'Writing cache to %r failed: %s' % (fn, tb))
def load(self, section, key, dtype='json', default=None):
assert dtype in ('json',)
if not self.enabled:
return default
cache_fn = self._get_cache_fn(section, key, dtype)
try:
try:
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
return json.load(cachef)
except ValueError:
try:
file_size = os.path.getsize(cache_fn)
except (OSError, IOError) as oe:
file_size = str(oe)
self._ydl.report_warning(
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
except IOError:
pass # No cache available
return default
def remove(self):
if not self.enabled:
self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
return
cachedir = self._get_root_dir()
if not any((term in cachedir) for term in ('cache', 'tmp')):
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
self._ydl.to_screen(
'Removing cache dir %s .' % cachedir, skip_eol=True)
if os.path.exists(cachedir):
self._ydl.to_screen('.', skip_eol=True)
shutil.rmtree(cachedir)
self._ydl.to_screen('.')

View File

@@ -193,7 +193,8 @@ class HttpFD(FileDownloader):
self.to_stderr(u"\n")
self.report_error(u'Did not get any data blocks')
return False
stream.close()
if tmpfilename != u'-':
stream.close()
self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))

View File

@@ -67,6 +67,7 @@ from .dailymotion import (
DailymotionUserIE,
)
from .daum import DaumIE
from .dbtv import DBTVIE
from .dfb import DFBIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
@@ -86,7 +87,7 @@ from .ellentv import (
EllenTVClipsIE,
)
from .elpais import ElPaisIE
from .empflix import EmpflixIE
from .empflix import EMPFlixIE
from .engadget import EngadgetIE
from .eporner import EpornerIE
from .escapist import EscapistIE
@@ -139,6 +140,7 @@ from .hark import HarkIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hornbunny import HornBunnyIE
from .hostingbulk import HostingBulkIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
@@ -199,6 +201,7 @@ from .mitele import MiTeleIE
from .mixcloud import MixcloudIE
from .mlb import MLBIE
from .mpora import MporaIE
from .moevideo import MoeVideoIE
from .mofosex import MofosexIE
from .mojvideo import MojvideoIE
from .mooshare import MooshareIE
@@ -235,6 +238,7 @@ from .niconico import NiconicoIE
from .ninegag import NineGagIE
from .noco import NocoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
from .novamov import NovaMovIE
from .nowness import NownessIE
from .nowvideo import NowVideoIE
@@ -294,6 +298,7 @@ from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .servingsys import ServingSysIE
from .shared import SharedIE
from .sharesix import ShareSixIE
from .sina import SinaIE
from .slideshare import SlideshareIE
from .slutload import SlutloadIE
@@ -347,6 +352,7 @@ from .theplatform import ThePlatformIE
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .tlc import TlcIE, TlcDeIE
from .tnaflix import TNAFlixIE
from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
@@ -372,6 +378,7 @@ from .veehd import VeeHDIE
from .veoh import VeohIE
from .vesti import VestiIE
from .vevo import VevoIE
from .vgtv import VGTVIE
from .vh1 import VH1IE
from .viddler import ViddlerIE
from .videobam import VideoBamIE

View File

@@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor):
def _extract_from_webpage(self, webpage, video_id, lang):
json_url = self._html_search_regex(
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
webpage, 'json vp url')
return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang):

View File

@@ -27,8 +27,16 @@ class BeegIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r"'480p'\s*:\s*'([^']+)'", webpage, 'video URL')
quality_arr = self._search_regex(
r'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage, 'quality formats')
formats = [{
'url': fmt[1],
'format_id': fmt[0],
'height': int(fmt[0][:-1]),
} for fmt in re.findall(r"'([^']+)'\s*:\s*'([^']+)'", quality_arr)]
self._sort_formats(formats)
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
@@ -48,10 +56,10 @@ class BeegIE(InfoExtractor):
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
'categories': categories,
'formats': formats,
'age_limit': 18,
}

View File

@@ -1,11 +1,13 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import int_or_none
translation_table = {
_translation_table = {
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
'y': 'l', 'z': 'i',
@@ -13,6 +15,10 @@ translation_table = {
}
def _decode(s):
return ''.join(_translation_table.get(c, c) for c in s)
class CliphunterIE(InfoExtractor):
IE_NAME = 'cliphunter'
@@ -22,10 +28,14 @@ class CliphunterIE(InfoExtractor):
'''
_TEST = {
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
'file': '1012420.flv',
'md5': '15e7740f30428abf70f4223478dc1225',
'md5': 'a2ba71eebf523859fe527a61018f723e',
'info_dict': {
'id': '1012420',
'ext': 'mp4',
'title': 'Fun Jynx Maze solo',
'thumbnail': 're:^https?://.*\.jpg$',
'age_limit': 18,
'duration': 1317,
}
}
@@ -35,22 +45,55 @@ class CliphunterIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title')
pl_fiji = self._search_regex(
r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
pl_c_qual = self._search_regex(
r'pl_c_qual = "(.)"', webpage, 'video quality')
video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title')
video_url = ''.join(translation_table.get(c, c) for c in pl_fiji)
video_url = _decode(pl_fiji)
formats = [{
'url': video_url,
'format_id': pl_c_qual,
'format_id': 'default-%s' % pl_c_qual,
}]
qualities_json = self._search_regex(
r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info')
qualities_data = json.loads(qualities_json)
for i, t in enumerate(
re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)):
quality_id, crypted_url = t
video_url = _decode(crypted_url)
f = {
'format_id': quality_id,
'url': video_url,
'quality': i,
}
if quality_id in qualities_data:
qd = qualities_data[quality_id]
m = re.match(
r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b>
\s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd)
if m:
f['width'] = int(m.group('width'))
f['height'] = int(m.group('height'))
f['tbr'] = int(m.group('tbr'))
formats.append(f)
self._sort_formats(formats)
thumbnail = self._search_regex(
r"var\s+mov_thumb\s*=\s*'([^']+)';",
webpage, 'thumbnail', fatal=False)
duration = int_or_none(self._search_regex(
r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': video_title,
'formats': formats,
'duration': duration,
'age_limit': self._rta_search(webpage),
'thumbnail': thumbnail,
}

View File

@@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
clean_html,
)
class DBTVIE(InfoExtractor):
_VALID_URL = r'http://dbtv\.no/(?P<id>[0-9]+)#(?P<display_id>.+)'
_TEST = {
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
'info_dict': {
'id': '33100',
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
'ext': 'mp4',
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
'thumbnail': 're:https?://.*\.jpg$',
'timestamp': 1404039863.438,
'upload_date': '20140629',
'duration': 69.544,
'view_count': int,
'categories': list,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
data = self._download_json(
'http://api.dbtv.no/discovery/%s' % video_id, display_id)
video = data['playlist'][0]
formats = [{
'url': f['URL'],
'vcodec': f.get('container'),
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'vbr': float_or_none(f.get('rate'), 1000),
'filesize': int_or_none(f.get('size')),
} for f in video['renditions'] if 'URL' in f]
if not formats:
for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
if url_key in video:
formats.append({
'url': video[url_key],
'format_id': format_id,
})
self._sort_formats(formats)
return {
'id': video['id'],
'display_id': display_id,
'title': video['title'],
'description': clean_html(video['desc']),
'thumbnail': video.get('splash') or video.get('thumb'),
'timestamp': float_or_none(video.get('publishedAt'), 1000),
'duration': float_or_none(video.get('length'), 1000),
'view_count': int_or_none(video.get('views')),
'categories': video.get('tags'),
'formats': formats,
}

View File

@@ -3,17 +3,22 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import str_to_int
class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<title_dash>[\w-]+)'
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
_TEST = {
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
'info_dict': {
'id': '1740434',
'display_id': 'hot-perky-blonde-naked-golf',
'ext': 'mp4',
'title': 'Hot Perky Blonde Naked Golf',
'like_count': int,
'dislike_count': int,
'comment_count': int,
'categories': list, # NSFW
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
@@ -23,8 +28,9 @@ class DrTuberIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, display_id)
video_url = self._html_search_regex(
r'<source src="([^"]+)"', webpage, 'video URL')
@@ -36,15 +42,29 @@ class DrTuberIE(InfoExtractor):
r'poster="([^"]+)"',
webpage, 'thumbnail', fatal=False)
like_count = str_to_int(self._html_search_regex(
r'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
webpage, 'like count', fatal=False))
dislike_count = str_to_int(self._html_search_regex(
r'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
webpage, 'like count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r'<span class="comments_count">([\d,\.]+)</span>',
webpage, 'comment count', fatal=False))
cats_str = self._html_search_regex(
r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
categories = None if cats_str is None else cats_str.split(' ')
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
'categories': categories,
'age_limit': self._rta_search(webpage),
}

View File

@@ -1,58 +1,25 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import fix_xml_ampersands
from .tnaflix import TNAFlixIE
class EmpflixIE(InfoExtractor):
_VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'
class EMPFlixIE(TNAFlixIE):
_VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html'
_TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
_DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
_TEST = {
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
'info_dict': {
'id': '33051',
'display_id': 'Amateur-Finger-Fuck',
'ext': 'mp4',
'title': 'Amateur Finger Fuck',
'description': 'Amateur solo finger fucking.',
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
age_limit = self._rta_search(webpage)
video_title = self._html_search_regex(
r'name="title" value="(?P<title>[^"]*)"', webpage, 'title')
video_description = self._html_search_regex(
r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)
cfg_url = self._html_search_regex(
r'flashvars\.config = escape\("([^"]+)"',
webpage, 'flashvars.config')
cfg_xml = self._download_xml(
cfg_url, video_id, note='Downloading metadata',
transform_source=fix_xml_ampersands)
formats = [
{
'url': item.find('videoLink').text,
'format_id': item.find('res').text,
} for item in cfg_xml.findall('./quality/item')
]
thumbnail = cfg_xml.find('./startThumb').text
return {
'id': video_id,
'title': video_title,
'description': video_description,
'thumbnail': thumbnail,
'formats': formats,
'age_limit': age_limit,
}

View File

@@ -11,12 +11,13 @@ from ..utils import (
class EpornerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<title_dash>[\w-]+)/?'
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
_TEST = {
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
'md5': '3b427ae4b9d60619106de3185c2987cd',
'info_dict': {
'id': '95008',
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
'ext': 'flv',
'title': 'Infamous Tiffany Teen Strip Tease Video',
'duration': 194,
@@ -28,7 +29,9 @@ class EpornerIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'<title>(.*?) - EPORNER', webpage, 'title')
@@ -37,9 +40,22 @@ class EpornerIE(InfoExtractor):
webpage, 'redirect_code')
redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
player_code = self._download_webpage(
redirect_url, video_id, note='Downloading player config')
video_url = self._html_search_regex(
r'file: "(.*?)",', player_code, 'video_url')
redirect_url, display_id, note='Downloading player config')
sources = self._search_regex(
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
formats = []
for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
fmt = {
'url': video_url,
'format_id': format_id,
}
m = re.search(r'^(\d+)', format_id)
if m:
fmt['height'] = int(m.group(1))
formats.append(fmt)
self._sort_formats(formats)
duration = parse_duration(self._search_regex(
r'class="mbtim">([0-9:]+)</div>', webpage, 'duration',
@@ -50,9 +66,10 @@ class EpornerIE(InfoExtractor):
return {
'id': video_id,
'url': video_url,
'display_id': display_id,
'title': title,
'duration': duration,
'view_count': view_count,
'formats': formats,
'age_limit': self._rta_search(webpage),
}

View File

@@ -79,7 +79,8 @@ class FacebookIE(InfoExtractor):
check_form = {
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'),
'h': self._search_regex(
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
'name_action_selected': 'dont_save',
}
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))

View File

@@ -366,7 +366,22 @@ class GenericIE(InfoExtractor):
'extract_flat': False,
'skip_download': True,
}
}
},
# MLB embed
{
'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
'md5': '96f09a37e44da40dd083e12d9a683327',
'info_dict': {
'id': '33322633',
'ext': 'mp4',
'title': 'Ump changes call to ball',
'description': 'md5:71c11215384298a172a6dcb4c2e20685',
'duration': 48,
'timestamp': 1401537900,
'upload_date': '20140531',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
]
def report_download_webpage(self, video_id):
@@ -613,7 +628,7 @@ class GenericIE(InfoExtractor):
embedSWF\(?:\s*
)
(["\'])
(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
(?:embed|v)/.+?)
\1''', webpage)
if matches:
@@ -809,6 +824,12 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'SBS')
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'MLB')
# Start with something easy: JW Player in SWFObject
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if not found:

View File

@@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_request,
int_or_none,
urlencode_postdata,
)
class HostingBulkIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:www\.)?hostingbulk\.com/
(?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html'''
_FILE_DELETED_REGEX = r'<b>File Not Found</b>'
_TEST = {
'url': 'http://hostingbulk.com/n0ulw1hv20fm.html',
'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f',
'info_dict': {
'id': 'n0ulw1hv20fm',
'ext': 'mp4',
'title': 'md5:5afeba33f48ec87219c269e054afd622',
'filesize': 6816081,
'thumbnail': 're:^http://.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
# Custom request with cookie to set language to English, so our file
# deleted regex would work.
request = compat_urllib_request.Request(
url, headers={'Cookie': 'lang=english'})
webpage = self._download_webpage(request, video_id)
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title')
filesize = int_or_none(
self._search_regex(
r'<small>\((\d+)\sbytes?\)</small>',
webpage,
'filesize',
fatal=False
)
)
thumbnail = self._search_regex(
r'<img src="([^"]+)".+?class="pic"',
webpage, 'thumbnail', fatal=False)
fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+
name="([^"]+)"\s+
value="([^"]*)"
''', webpage))
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
request.add_header('Content-type', 'application/x-www-form-urlencoded')
response = self._request_webpage(request, video_id,
'Submiting download request')
video_url = response.geturl()
formats = [{
'format_id': 'sd',
'filesize': filesize,
'url': video_url,
}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -9,29 +9,50 @@ from ..utils import (
parse_iso8601,
determine_ext,
int_or_none,
float_or_none,
str_to_int,
)
class IzleseneIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
_STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
_TEST = {
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
'md5': '4384f9f0ea65086734b881085ee05ac2',
'info_dict': {
'id': '7599694',
'ext': 'mp4',
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
'thumbnail': 're:^http://.*\.jpg',
'uploader_id': 'pelikzzle',
'timestamp': 1404298698,
'upload_date': '20140702',
'duration': 95.395,
'age_limit': 0,
}
}
_VALID_URL = r'''(?x)
https?://(?:(?:www|m)\.)?izlesene\.com/
(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)
'''
_TESTS = [
{
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
'md5': '4384f9f0ea65086734b881085ee05ac2',
'info_dict': {
'id': '7599694',
'ext': 'mp4',
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
'description': 'md5:253753e2655dde93f59f74b572454f6d',
'thumbnail': 're:^http://.*\.jpg',
'uploader_id': 'pelikzzle',
'timestamp': 1404298698,
'upload_date': '20140702',
'duration': 95.395,
'age_limit': 0,
}
},
{
'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997',
'md5': '97f09b6872bffa284cb7fa4f6910cb72',
'info_dict': {
'id': '17997',
'ext': 'mp4',
'title': 'Tarkan Dortmund 2006 Konseri',
'description': 'Tarkan Dortmund 2006 Konseri',
'thumbnail': 're:^http://.*\.jpg',
'uploader_id': 'parlayankiz',
'timestamp': 1163318593,
'upload_date': '20061112',
'duration': 253.666,
'age_limit': 0,
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -45,18 +66,19 @@ class IzleseneIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
r"adduserUsername\s*=\s*'([^']+)';",
webpage, 'uploader', fatal=False, default='')
timestamp = parse_iso8601(self._html_search_meta(
'uploadDate', webpage, 'upload date', fatal=False))
duration = int_or_none(self._html_search_regex(
r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
if duration:
duration /= 1000.0
duration = float_or_none(self._html_search_regex(
r'"videoduration"\s*:\s*"([^"]+)"',
webpage, 'duration', fatal=False), scale=1000)
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
comment_count = self._html_search_regex(
r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
r'comment_count\s*=\s*\'([^\']+)\';',
webpage, 'comment_count', fatal=False)
family_friendly = self._html_search_meta(
'isFamilyFriendly', webpage, 'age limit', fatal=False)
@@ -66,20 +88,26 @@ class IzleseneIE(InfoExtractor):
ext = determine_ext(content_url, 'mp4')
# Might be empty for some videos.
qualities = self._html_search_regex(
r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
streams = self._html_search_regex(
r'"qualitylevel"\s*:\s*"([^"]+)"',
webpage, 'streams', fatal=False, default='')
formats = []
for quality in qualities.split('|'):
json = self._download_json(
self._STREAM_URL.format(id=video_id, format=quality), video_id,
note='Getting video URL for "%s" quality' % quality,
errnote='Failed to get video URL for "%s" quality' % quality
)
if streams:
for stream in streams.split('|'):
quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
formats.append({
'format_id': '%sp' % quality if quality else 'sd',
'url': url,
'ext': ext,
})
else:
stream_url = self._search_regex(
r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL')
formats.append({
'url': json.get('streamurl'),
'format_id': 'sd',
'url': stream_url,
'ext': ext,
'format_id': '%sp' % quality if quality else 'sd',
})
return {

View File

@@ -9,7 +9,7 @@ from ..utils import (
class KhanAcademyIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
_VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
IE_NAME = 'KhanAcademy'
_TESTS = [{

View File

@@ -11,7 +11,7 @@ from ..utils import (
class MLBIE(InfoExtractor):
_VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
_VALID_URL = r'https?://m\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|shared/video/embed/embed\.html\?.*?\bcontent_id=)(?P<id>n?\d+)'
_TESTS = [
{
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@ -69,6 +69,10 @@ class MLBIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
'only_matching': True,
},
]
def _real_extract(self, url):

View File

@@ -0,0 +1,112 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
int_or_none,
)
class MoeVideoIE(InfoExtractor):
IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
(?:(?:moevideo|playreplay|videochart)\.net))/
(?:video|framevideo)/(?P<id>[0-9]+\.[0-9A-Za-z]+)'''
_API_URL = 'http://api.letitbit.net/'
_API_KEY = 'tVL0gjqo5'
_TESTS = [
{
'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
'info_dict': {
'id': '00297.0036103fe3d513ef27915216fd29',
'ext': 'flv',
'title': 'Sink cut out machine',
'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
'thumbnail': 're:^https?://.*\.jpg$',
'width': 540,
'height': 360,
'duration': 179,
'filesize': 17822500,
}
},
{
'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
'md5': '74f0a014d5b661f0f0e2361300d1620e',
'info_dict': {
'id': '77107.7f325710a627383d40540d8e991a',
'ext': 'flv',
'title': 'Operacion Condor.',
'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
'thumbnail': 're:^https?://.*\.jpg$',
'width': 480,
'height': 296,
'duration': 6027,
'filesize': 588257923,
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(
'http://%s/video/%s' % (mobj.group('host'), video_id),
video_id, 'Downloading webpage')
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage)
r = [
self._API_KEY,
[
'preview/flv_link',
{
'uid': video_id,
},
],
]
r_json = json.dumps(r)
post = compat_urllib_parse.urlencode({'r': r_json})
req = compat_urllib_request.Request(self._API_URL, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
response = self._download_json(req, video_id)
if response['status'] != 'OK':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, response['data']),
expected=True
)
item = response['data'][0]
video_url = item['link']
duration = int_or_none(item['length'])
width = int_or_none(item['width'])
height = int_or_none(item['height'])
filesize = int_or_none(item['convert_size'])
formats = [{
'format_id': 'sd',
'http_headers': {'Range': 'bytes=0-'}, # Required to download
'url': video_url,
'width': width,
'height': height,
'filesize': filesize,
}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'description': description,
'duration': duration,
'formats': formats,
}

View File

@@ -3,18 +3,23 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
remove_end,
parse_duration,
)
class NBAIE(InfoExtractor):
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
_TEST = {
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
'info_dict': {
'id': '0021200253-okc-bkn-recap.nba',
'ext': 'mp4',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'title': 'Thunder vs. Nets',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181,
},
}
@@ -27,13 +32,18 @@ class NBAIE(InfoExtractor):
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
shortened_video_id = video_id.rpartition('/')[2]
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
title = remove_end(
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
description = self._og_search_description(webpage)
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration', fatal=False))
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
return {
'id': shortened_video_id,
'url': video_url,
'title': title,
'description': description,
'duration': duration,
}

View File

@@ -5,7 +5,10 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
compat_urllib_parse,
ExtractorError,
clean_html,
unified_strdate,
compat_str,
)
@@ -13,6 +16,8 @@ from ..utils import (
class NocoIE(InfoExtractor):
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
_LOGIN_URL = 'http://noco.tv/do.php'
_NETRC_MACHINE = 'noco'
_TEST = {
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
@@ -30,6 +35,28 @@ class NocoIE(InfoExtractor):
'skip': 'Requires noco account',
}
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
login_form = {
'a': 'login',
'cookie': '1',
'username': username,
'password': password,
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
login = self._download_json(request, None, 'Logging in as %s' % username)
if 'erreur' in login:
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')

View File

@@ -0,0 +1,65 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
xpath_with_ns,
)
_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
_find = lambda el, p: el.find(_x(p)).text.strip()
class NosVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \
'(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'
_PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml'
_TEST = {
'url': 'http://nosvideo.com/?v=drlp6s40kg54',
'md5': '4b4ac54c6ad5d70ab88f2c2c6ccec71c',
'info_dict': {
'id': 'drlp6s40kg54',
'ext': 'mp4',
'title': 'big_buck_bunny_480p_surround-fix.avi.mp4',
'thumbnail': 're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
fields = {
'id': video_id,
'op': 'download1',
'method_free': 'Continue to Video',
}
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id,
'Downloading download page')
xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID')
playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id)
playlist = self._download_xml(playlist_url, video_id)
track = playlist.find(_x('.//xspf:track'))
title = _find(track, './xspf:title')
url = _find(track, './xspf:file')
thumbnail = _find(track, './xspf:image')
formats = [{
'format_id': 'sd',
'url': url,
}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -100,7 +100,7 @@ class RUTVIE(InfoExtractor):
return mobj.group('url')
mobj = re.search(
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
webpage)
if mobj:
return mobj.group('url')

View File

@@ -0,0 +1,91 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
parse_duration,
)
class ShareSixIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)'
_TESTS = [
{
'url': 'http://sharesix.com/f/OXjQ7Y6',
'md5': '9e8e95d8823942815a7d7c773110cc93',
'info_dict': {
'id': 'OXjQ7Y6',
'ext': 'mp4',
'title': 'big_buck_bunny_480p_surround-fix.avi',
'duration': 596,
'width': 854,
'height': 480,
},
},
{
'url': 'http://sharesix.com/lfrwoxp35zdd',
'md5': 'dd19f1435b7cec2d7912c64beeee8185',
'info_dict': {
'id': 'lfrwoxp35zdd',
'ext': 'flv',
'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv',
'duration': 65,
'width': 1280,
'height': 720,
},
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
fields = {
'method_free': 'Free'
}
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id,
'Downloading video page')
video_url = self._search_regex(
r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL')
title = self._html_search_regex(
r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title')
duration = parse_duration(
self._search_regex(
r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>',
webpage,
'duration',
fatal=False
)
)
m = re.search(
r'''(?xs)<dt>Width\sx\sHeight</dt>.+?
<dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''',
webpage
)
width = height = None
if m:
width, height = int(m.group('width')), int(m.group('height'))
formats = [{
'format_id': 'sd',
'url': video_url,
'width': width,
'height': height,
}]
return {
'id': video_id,
'title': title,
'duration': duration,
'formats': formats,
}

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
_VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
_TEST = {
u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
u'playlist': [
'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
'info_dict': {
'id': '57758',
'title': 'Learning Topic Models --- Going beyond SVD',
},
'playlist': [
{
u'file': u'57758.flv',
u'info_dict': {
u'title': u'Learning Topic Models --- Going beyond SVD',
'info_dict': {
'id': '57758',
'ext': 'flv',
'title': 'Learning Topic Models --- Going beyond SVD',
},
},
{
u'file': u'57758-slides.flv',
u'info_dict': {
u'title': u'Learning Topic Models --- Going beyond SVD',
'info_dict': {
'id': '57758-slides',
'ext': 'flv',
'title': 'Learning Topic Models --- Going beyond SVD',
},
},
],
u'params': {
'params': {
# rtmp download
u'skip_download': True,
'skip_download': True,
},
}
@@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
talk_id = mobj.group('id')
webpage = self._download_webpage(url, talk_id)
rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
u'rtmp url')
play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
webpage, u'presenter play path')
rtmp_url = self._search_regex(
r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
play_path = self._search_regex(
r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
webpage, 'presenter play path')
title = clean_html(get_element_by_attribute('class', 'title', webpage))
video_info = {
'id': talk_id,
'title': title,
'url': rtmp_url,
'play_path': play_path,
'ext': 'flv',
}
'id': talk_id,
'title': title,
'url': rtmp_url,
'play_path': play_path,
'ext': 'flv',
}
m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
if m_slides is None:
return video_info
else:
return [
video_info,
# The slides video
{
'id': talk_id + '-slides',
'title': title,
'url': rtmp_url,
'play_path': m_slides.group(1),
'ext': 'flv',
},
]
return {
'_type': 'playlist',
'id': talk_id,
'title': title,
'entries': [
video_info,
# The slides video
{
'id': talk_id + '-slides',
'title': title,
'url': rtmp_url,
'play_path': m_slides.group(1),
'ext': 'flv',
},
],
}

View File

@@ -0,0 +1,84 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
fix_xml_ampersands,
)
class TNAFlixIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
_TITLE_REGEX = None
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
_TEST = {
'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
'md5': 'ecf3498417d09216374fc5907f9c6ec0',
'info_dict': {
'id': '553878',
'display_id': 'Carmella-Decesare-striptease',
'ext': 'mp4',
'title': 'Carmella Decesare - striptease',
'description': '',
'thumbnail': 're:https?://.*\.jpg$',
'duration': 91,
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
description = self._html_search_regex(
self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
age_limit = self._rta_search(webpage)
duration = self._html_search_meta('duration', webpage, 'duration', default=None)
if duration:
duration = parse_duration(duration[1:])
cfg_url = self._html_search_regex(
self._CONFIG_REGEX, webpage, 'flashvars.config')
cfg_xml = self._download_xml(
cfg_url, display_id, note='Downloading metadata',
transform_source=fix_xml_ampersands)
thumbnail = cfg_xml.find('./startThumb').text
formats = []
for item in cfg_xml.findall('./quality/item'):
video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
format_id = item.find('res').text
fmt = {
'url': video_url,
'format_id': format_id,
}
m = re.search(r'^(\d+)', format_id)
if m:
fmt['height'] = int(m.group(1))
formats.append(fmt)
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': age_limit,
'formats': formats,
}

View File

@@ -47,7 +47,7 @@ class ToypicsUserIE(InfoExtractor):
'info_dict': {
'id': 'Mikey',
},
'playlist_mincount': 9917,
'playlist_mincount': 19,
}
def _real_extract(self, url):

View File

@@ -5,80 +5,82 @@ import re
from .common import InfoExtractor
from ..utils import (
unified_strdate,
clean_html,
int_or_none,
float_or_none,
str_to_int,
)
class TvigleIE(InfoExtractor):
IE_NAME = 'tvigle'
IE_DESC = 'Интернет-телевидение Tvigle.ru'
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$'
_TESTS = [
{
'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
'md5': '09afba4616666249f087efc6dcf83cb3',
'url': 'http://www.tvigle.ru/video/brat-2/',
'md5': '72cb7eab33e54314e1790da402d3c9c3',
'info_dict': {
'id': '503081',
'ext': 'flv',
'id': '5119390',
'display_id': 'brat-2',
'ext': 'mp4',
'title': 'Брат 2 ',
'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
'upload_date': '20110919',
'description': 'md5:5751f4fe345a58e1692585c361294bd8',
'duration': 7356.369,
'age_limit': 0,
},
},
{
'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
'md5': 'd9012d7c7c598fe7a11d7fb46dc1f574',
'info_dict': {
'id': '676433',
'ext': 'flv',
'id': '5142516',
'ext': 'mp4',
'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
'description': 'md5:027f7dc872948f14c96d19b4178428a4',
'upload_date': '20121218',
'duration': 186.080,
'age_limit': 0,
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
video_data = self._download_xml(
'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML')
webpage = self._download_webpage(url, display_id)
video = video_data.find('./video')
video_id = self._html_search_regex(
r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id')
title = video.get('name')
description = video.get('anons')
if description:
description = clean_html(description)
thumbnail = video_data.get('img')
upload_date = unified_strdate(video.get('date'))
like_count = int_or_none(video.get('vtp'))
video_data = self._download_json(
'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
item = video_data['playlist']['items'][0]
title = item['title']
description = item['description']
thumbnail = item['thumbnail']
duration = float_or_none(item['durationMilliseconds'], 1000)
age_limit = str_to_int(item['ageRestrictions'])
formats = []
for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]):
video_url = video.get(format_id)
if not video_url:
continue
formats.append({
'url': video_url,
'format_id': format_id,
'format_note': format_note,
'quality': num,
})
for vcodec, fmts in item['videos'].items():
for quality, video_url in fmts.items():
formats.append({
'url': video_url,
'format_id': '%s-%s' % (vcodec, quality),
'vcodec': vcodec,
'height': int(quality[:-1]),
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'like_count': like_count,
'age_limit': 18,
'duration': duration,
'age_limit': age_limit,
'formats': formats,
}

View File

@@ -1,32 +1,66 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import qualities
class UnistraIE(InfoExtractor):
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
_TEST = {
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
u'file': u'154.mp4',
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
u'info_dict': {
u'title': u'M!ss Yella',
u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
_TESTS = [
{
'url': 'http://utv.unistra.fr/video.php?id_video=154',
'md5': '736f605cfdc96724d55bb543ab3ced24',
'info_dict': {
'id': '154',
'ext': 'mp4',
'title': 'M!ss Yella',
'description': 'md5:104892c71bd48e55d70b902736b81bbf',
},
},
}
{
'url': 'http://utv.unistra.fr/index.php?id_video=437',
'md5': '1ddddd6cccaae76f622ce29b8779636d',
'info_dict': {
'id': '437',
'ext': 'mp4',
'title': 'Prix Louise Weiss 2014',
'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a',
},
}
]
def _real_extract(self, url):
id = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, id)
file = re.search(r'file: "(.*?)",', webpage).group(1)
title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
webpage = self._download_webpage(url, video_id)
return {'id': id,
'title': title,
'ext': 'mp4',
'url': video_url,
'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
}
files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage))
quality = qualities(['SD', 'HD'])
formats = []
for file_path in files:
format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD'
formats.append({
'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path,
'format_id': format_id,
'quality': quality(format_id)
})
title = self._html_search_regex(
r'<title>UTV - (.*?)</', webpage, 'title')
description = self._html_search_regex(
r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL)
thumbnail = self._search_regex(
r'image: "(.*?)"', webpage, 'thumbnail')
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats
}

View File

@@ -0,0 +1,119 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import float_or_none
class VGTVIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)'
_TESTS = [
{
# streamType: vod
'url': 'http://www.vgtv.no/#!/video/84196/hevnen-er-soet-episode-10-abu',
'md5': 'b8be7a234cebb840c0d512c78013e02f',
'info_dict': {
'id': '84196',
'ext': 'mp4',
'title': 'Hevnen er søt episode 10: Abu',
'description': 'md5:e25e4badb5f544b04341e14abdc72234',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 648.000,
'timestamp': 1404626400,
'upload_date': '20140706',
'view_count': int,
},
},
{
# streamType: wasLive
'url': 'http://www.vgtv.no/#!/live/100764/opptak-vgtv-foelger-em-kvalifiseringen',
'info_dict': {
'id': '100764',
'ext': 'mp4',
'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 9056.000,
'timestamp': 1410113864,
'upload_date': '20140907',
'view_count': int,
},
'params': {
# m3u8 download
'skip_download': True,
},
},
{
# streamType: live
'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen',
'info_dict': {
'id': '100015',
'ext': 'mp4',
'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!',
'description': 'md5:9a60cc23fa349f761628924e56eeec2d',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 0,
'timestamp': 1407423348,
'upload_date': '20140807',
'view_count': int,
},
'params': {
# m3u8 download
'skip_download': True,
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
data = self._download_json(
'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
video_id, 'Downloading media JSON')
streams = data['streamUrls']
formats = []
hls_url = streams.get('hls')
if hls_url:
formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4'))
hds_url = streams.get('hds')
if hds_url:
formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id))
mp4_url = streams.get('mp4')
if mp4_url:
_url = hls_url or hds_url
MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1])
for mp4_format in _url.split(','):
m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format)
if not m:
continue
width = int(m.group('width'))
height = int(m.group('height'))
vbr = int(m.group('vbr'))
formats.append({
'url': MP4_URL_TEMPLATE % mp4_format,
'format_id': 'mp4-%s' % vbr,
'width': width,
'height': height,
'vbr': vbr,
'preference': 1,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': data['title'],
'description': data['description'],
'thumbnail': data['images']['main'] + '?t[]=900x506q80',
'timestamp': data['published'],
'duration': float_or_none(data['duration'], 1000),
'view_count': data['displays'],
'formats': formats,
}

View File

@@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
_TEST = {
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
'info_dict': {
'title': 'Sinkhole of bureaucracy',
},
'playlist': [{
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
'info_dict': {

View File

@@ -1,7 +1,5 @@
# coding: utf-8
import errno
import io
import itertools
import json
import os.path
@@ -21,7 +19,6 @@ from ..utils import (
compat_str,
clean_html,
get_cachedir,
get_element_by_id,
get_element_by_attribute,
ExtractorError,
@@ -30,7 +27,6 @@ from ..utils import (
unescapeHTML,
unified_strdate,
orderedSet,
write_json_file,
uppercase_escape,
)
@@ -435,26 +431,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
func_id = '%s_%s_%s' % (
player_type, player_id, self._signature_cache_id(example_sig))
assert os.path.basename(func_id) == func_id
cache_dir = get_cachedir(self._downloader.params)
cache_enabled = cache_dir is not None
if cache_enabled:
cache_fn = os.path.join(os.path.expanduser(cache_dir),
u'youtube-sigfuncs',
func_id + '.json')
try:
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
cache_spec = json.load(cachef)
return lambda s: u''.join(s[i] for i in cache_spec)
except IOError:
pass # No cache available
except ValueError:
try:
file_size = os.path.getsize(cache_fn)
except (OSError, IOError) as oe:
file_size = str(oe)
self._downloader.report_warning(
u'Cache %s failed (%s)' % (cache_fn, file_size))
cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
if cache_spec is not None:
return lambda s: u''.join(s[i] for i in cache_spec)
if player_type == 'js':
code = self._download_webpage(
@@ -472,22 +452,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
assert False, 'Invalid player type %r' % player_type
if cache_enabled:
try:
test_string = u''.join(map(compat_chr, range(len(example_sig))))
cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res]
try:
os.makedirs(os.path.dirname(cache_fn))
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
write_json_file(cache_spec, cache_fn)
except Exception:
tb = traceback.format_exc()
self._downloader.report_warning(
u'Writing cache to %r failed: %s' % (cache_fn, tb))
if cache_spec is None:
test_string = u''.join(map(compat_chr, range(len(example_sig))))
cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res]
self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec)
return res
def _print_sig_code(self, func, example_sig):

View File

@@ -1076,12 +1076,6 @@ def intlist_to_bytes(xs):
return bytes(xs)
def get_cachedir(params={}):
cache_root = os.environ.get('XDG_CACHE_HOME',
os.path.expanduser('~/.cache'))
return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
# Cross-platform file locking
if sys.platform == 'win32':
import ctypes.wintypes
@@ -1321,7 +1315,7 @@ def str_to_int(int_str):
""" A more relaxed version of int_or_none """
if int_str is None:
return None
int_str = re.sub(r'[,\.]', u'', int_str)
int_str = re.sub(r'[,\.\+]', u'', int_str)
return int(int_str)
@@ -1336,7 +1330,7 @@ def parse_duration(s):
s = s.strip()
m = re.match(
r'(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
if not m:
return None
res = int(m.group('secs'))

View File

@@ -1,2 +1,2 @@
__version__ = '2014.09.01.2'
__version__ = '2014.09.10.1'