Compare commits
	
		
			96 Commits
		
	
	
		
			2014.08.28
			...
			2014.09.04
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 16e6f396b4 | ||
|   | c6ec6b2e8b | ||
|   | 7bbc6428b6 | ||
|   | c1a3c9ddb2 | ||
|   | feec0f56f5 | ||
|   | 8029857d27 | ||
|   | aa61802c1e | ||
|   | f54aee0209 | ||
|   | 5df921b0e3 | ||
|   | 35d5b67876 | ||
|   | 674c869af4 | ||
|   | 10710ae386 | ||
|   | a0e07d3161 | ||
|   | 88fc294f7f | ||
|   | a232bb9551 | ||
|   | eb833b7f5a | ||
|   | f164038b79 | ||
|   | f7a361c4f1 | ||
|   | 884ae74785 | ||
|   | 1dba4a2185 | ||
|   | 7d4d5f25ed | ||
|   | 33422c056d | ||
|   | a7862a1bc8 | ||
|   | 3baa62e8d1 | ||
|   | 1bf8cf5c2c | ||
|   | eade1d7eab | ||
|   | 1a94ff6865 | ||
|   | b47ed50aaf | ||
|   | 1b8477729a | ||
|   | ff6ade294c | ||
|   | 11fc065c57 | ||
|   | 94388f50b3 | ||
|   | a444648202 | ||
|   | 7ca2e11f24 | ||
|   | 563f6dea59 | ||
|   | e4039057be | ||
|   | 11342b54d6 | ||
|   | 49fa38adf2 | ||
|   | 9b330db7f0 | ||
|   | d740f7e16f | ||
|   | 07e7dc4bdc | ||
|   | 4c59dc4c34 | ||
|   | 7260ea0705 | ||
|   | 35b1e44567 | ||
|   | c9ea760e31 | ||
|   | 9ebf22b7d9 | ||
|   | 2582bebe06 | ||
|   | c9cc0bf57b | ||
|   | 61edcfb0a2 | ||
|   | a8be56ce3d | ||
|   | 329818484c | ||
|   | 8bdfddf641 | ||
|   | 36d65b61d4 | ||
|   | 7d48c06f27 | ||
|   | d169e36f5c | ||
|   | 2d7af09487 | ||
|   | 48d4681efc | ||
|   | 9ea9b61448 | ||
|   | 04b4aa4a7b | ||
|   | 5a3f0d9aee | ||
|   | 1ed5b5c9c8 | ||
|   | d10548b691 | ||
|   | e990510e6b | ||
|   | 55f7bd2dcc | ||
|   | f931e25959 | ||
|   | ca9cd290c7 | ||
|   | 49e23e8b6a | ||
|   | ae7246e7d5 | ||
|   | 43fd392413 | ||
|   | 3e7c12240c | ||
|   | 7eb21356f9 | ||
|   | f30a38be8b | ||
|   | 2aebbccefc | ||
|   | b170935a8f | ||
|   | 35241d05d1 | ||
|   | be2dd0651e | ||
|   | 6a400a6339 | ||
|   | 7b53af7f70 | ||
|   | ca7b3246b6 | ||
|   | 9c4c233b84 | ||
|   | 8a6c59865d | ||
|   | 1d57b2520c | ||
|   | 17b0b8a166 | ||
|   | 12c82cf9cb | ||
|   | 0bafcf6f46 | ||
|   | bbc9dc56f6 | ||
|   | 72c65d39ff | ||
|   | 676e3ecf24 | ||
|   | 78272a076e | ||
|   | 723e04d0be | ||
|   | 08a36c3569 | ||
|   | 37709fae89 | ||
|   | a81e4eb69d | ||
|   | 8e72edfb19 | ||
|   | 863f08a92e | ||
|   | de2d9f5f1b | 
| @@ -143,32 +143,6 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) | ||||
|         self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS']) | ||||
|  | ||||
|     def test_ComedyCentralShows(self): | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food', | ||||
|             ['ComedyCentralShows']) | ||||
|  | ||||
|     def test_yahoo_https(self): | ||||
|         # https://github.com/rg3/youtube-dl/issues/2701 | ||||
|         self.assertMatch( | ||||
|   | ||||
							
								
								
									
										59
									
								
								test/test_cache.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								test/test_cache.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | ||||
| #!/usr/bin/env python | ||||
| # coding: utf-8 | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import shutil | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| from test.helper import FakeYDL | ||||
| from youtube_dl.cache import Cache | ||||
|  | ||||
|  | ||||
| def _is_empty(d): | ||||
|     return not bool(os.listdir(d)) | ||||
|  | ||||
|  | ||||
| def _mkdir(d): | ||||
|     if not os.path.exists(d): | ||||
|         os.mkdir(d) | ||||
|  | ||||
|  | ||||
| class TestCache(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | ||||
|         TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') | ||||
|         _mkdir(TESTDATA_DIR) | ||||
|         self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test') | ||||
|         self.tearDown() | ||||
|  | ||||
|     def tearDown(self): | ||||
|         if os.path.exists(self.test_dir): | ||||
|             shutil.rmtree(self.test_dir) | ||||
|  | ||||
|     def test_cache(self): | ||||
|         ydl = FakeYDL({ | ||||
|             'cachedir': self.test_dir, | ||||
|         }) | ||||
|         c = Cache(ydl) | ||||
|         obj = {'x': 1, 'y': ['ä', '\\a', True]} | ||||
|         self.assertEqual(c.load('test_cache', 'k.'), None) | ||||
|         c.store('test_cache', 'k.', obj) | ||||
|         self.assertEqual(c.load('test_cache', 'k2'), None) | ||||
|         self.assertFalse(_is_empty(self.test_dir)) | ||||
|         self.assertEqual(c.load('test_cache', 'k.'), obj) | ||||
|         self.assertEqual(c.load('test_cache', 'y'), None) | ||||
|         self.assertEqual(c.load('test_cache2', 'k.'), None) | ||||
|         c.remove() | ||||
|         self.assertFalse(os.path.exists(self.test_dir)) | ||||
|         self.assertEqual(c.load('test_cache', 'k.'), None) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -167,21 +167,21 @@ def generator(test_case): | ||||
|                 if not test_case.get('params', {}).get('skip_download', False): | ||||
|                     self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) | ||||
|                     self.assertTrue(tc_filename in finished_hook_called) | ||||
|                     expected_minsize = tc.get('file_minsize', 10000) | ||||
|                     if expected_minsize is not None: | ||||
|                         if params.get('test'): | ||||
|                             expected_minsize = max(expected_minsize, 10000) | ||||
|                         got_fsize = os.path.getsize(tc_filename) | ||||
|                         assertGreaterEqual( | ||||
|                             self, got_fsize, expected_minsize, | ||||
|                             'Expected %s to be at least %s, but it\'s only %s ' % | ||||
|                             (tc_filename, format_bytes(expected_minsize), | ||||
|                                 format_bytes(got_fsize))) | ||||
|                     if 'md5' in tc: | ||||
|                         md5_for_file = _file_md5(tc_filename) | ||||
|                         self.assertEqual(md5_for_file, tc['md5']) | ||||
|                 info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' | ||||
|                 self.assertTrue(os.path.exists(info_json_fn)) | ||||
|                 if 'md5' in tc: | ||||
|                     md5_for_file = _file_md5(tc_filename) | ||||
|                     self.assertEqual(md5_for_file, tc['md5']) | ||||
|                 expected_minsize = tc.get('file_minsize', 10000) | ||||
|                 if expected_minsize is not None: | ||||
|                     if params.get('test'): | ||||
|                         expected_minsize = max(expected_minsize, 10000) | ||||
|                     got_fsize = os.path.getsize(tc_filename) | ||||
|                     assertGreaterEqual( | ||||
|                         self, got_fsize, expected_minsize, | ||||
|                         'Expected %s to be at least %s, but it\'s only %s ' % | ||||
|                         (tc_filename, format_bytes(expected_minsize), | ||||
|                             format_bytes(got_fsize))) | ||||
|                 with io.open(info_json_fn, encoding='utf-8') as infof: | ||||
|                     info_dict = json.load(infof) | ||||
|  | ||||
|   | ||||
| @@ -211,6 +211,9 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(parse_duration('00:01:01'), 61) | ||||
|         self.assertEqual(parse_duration('x:y'), None) | ||||
|         self.assertEqual(parse_duration('3h11m53s'), 11513) | ||||
|         self.assertEqual(parse_duration('3h 11m 53s'), 11513) | ||||
|         self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) | ||||
|         self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) | ||||
|         self.assertEqual(parse_duration('62m45s'), 3765) | ||||
|         self.assertEqual(parse_duration('6m59s'), 419) | ||||
|         self.assertEqual(parse_duration('49s'), 49) | ||||
|   | ||||
| @@ -57,6 +57,7 @@ from .utils import ( | ||||
|     YoutubeDLHandler, | ||||
|     prepend_extension, | ||||
| ) | ||||
| from .cache import Cache | ||||
| from .extractor import get_info_extractor, gen_extractors | ||||
| from .downloader import get_suitable_downloader | ||||
| from .postprocessor import FFmpegMergerPP | ||||
| @@ -133,7 +134,7 @@ class YoutubeDL(object): | ||||
|     daterange:         A DateRange object, download only if the upload_date is in the range. | ||||
|     skip_download:     Skip the actual download of the video file | ||||
|     cachedir:          Location of the cache files in the filesystem. | ||||
|                        None to disable filesystem cache. | ||||
|                        False to disable filesystem cache. | ||||
|     noplaylist:        Download single video instead of a playlist if in doubt. | ||||
|     age_limit:         An integer representing the user's age in years. | ||||
|                        Unsuitable videos for the given age are skipped. | ||||
| @@ -195,6 +196,7 @@ class YoutubeDL(object): | ||||
|         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] | ||||
|         self._err_file = sys.stderr | ||||
|         self.params = params | ||||
|         self.cache = Cache(self) | ||||
|  | ||||
|         if params.get('bidi_workaround', False): | ||||
|             try: | ||||
|   | ||||
| @@ -84,7 +84,6 @@ import optparse | ||||
| import os | ||||
| import random | ||||
| import shlex | ||||
| import shutil | ||||
| import sys | ||||
|  | ||||
|  | ||||
| @@ -96,7 +95,6 @@ from .utils import ( | ||||
|     decodeOption, | ||||
|     get_term_width, | ||||
|     DownloadError, | ||||
|     get_cachedir, | ||||
|     MaxDownloadsReached, | ||||
|     preferredencoding, | ||||
|     read_batch_urls, | ||||
| @@ -518,10 +516,10 @@ def parseOpts(overrideArguments=None): | ||||
|     filesystem.add_option('--cookies', | ||||
|             dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') | ||||
|     filesystem.add_option( | ||||
|         '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', | ||||
|         '--cache-dir', dest='cachedir', default=None, metavar='DIR', | ||||
|         help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') | ||||
|     filesystem.add_option( | ||||
|         '--no-cache-dir', action='store_const', const=None, dest='cachedir', | ||||
|         '--no-cache-dir', action='store_const', const=False, dest='cachedir', | ||||
|         help='Disable filesystem caching') | ||||
|     filesystem.add_option( | ||||
|         '--rm-cache-dir', action='store_true', dest='rm_cachedir', | ||||
| @@ -872,20 +870,7 @@ def _real_main(argv=None): | ||||
|  | ||||
|         # Remove cache dir | ||||
|         if opts.rm_cachedir: | ||||
|             if opts.cachedir is None: | ||||
|                 ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)') | ||||
|             else: | ||||
|                 if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir): | ||||
|                     ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir') | ||||
|                     retcode = 141 | ||||
|                 else: | ||||
|                     ydl.to_screen( | ||||
|                         u'Removing cache dir %s .' % opts.cachedir, | ||||
|                         skip_eol=True) | ||||
|                     if os.path.exists(opts.cachedir): | ||||
|                         ydl.to_screen(u'.', skip_eol=True) | ||||
|                         shutil.rmtree(opts.cachedir) | ||||
|                     ydl.to_screen(u'.') | ||||
|             ydl.cache.remove() | ||||
|  | ||||
|         # Maybe do nothing | ||||
|         if (len(all_urls) < 1) and (opts.load_info_filename is None): | ||||
|   | ||||
							
								
								
									
										94
									
								
								youtube_dl/cache.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								youtube_dl/cache.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,94 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import errno | ||||
| import io | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| import shutil | ||||
| import traceback | ||||
|  | ||||
| from .utils import ( | ||||
|     write_json_file, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class Cache(object): | ||||
|     def __init__(self, ydl): | ||||
|         self._ydl = ydl | ||||
|  | ||||
|     def _get_root_dir(self): | ||||
|         res = self._ydl.params.get('cachedir') | ||||
|         if res is None: | ||||
|             cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache') | ||||
|             res = os.path.join(cache_root, 'youtube-dl') | ||||
|         return os.path.expanduser(res) | ||||
|  | ||||
|     def _get_cache_fn(self, section, key, dtype): | ||||
|         assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ | ||||
|             'invalid section %r' % section | ||||
|         assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key | ||||
|         return os.path.join( | ||||
|             self._get_root_dir(), section, '%s.%s' % (key, dtype)) | ||||
|  | ||||
|     @property | ||||
|     def enabled(self): | ||||
|         return self._ydl.params.get('cachedir') is not False | ||||
|  | ||||
|     def store(self, section, key, data, dtype='json'): | ||||
|         assert dtype in ('json',) | ||||
|  | ||||
|         if not self.enabled: | ||||
|             return | ||||
|  | ||||
|         fn = self._get_cache_fn(section, key, dtype) | ||||
|         try: | ||||
|             try: | ||||
|                 os.makedirs(os.path.dirname(fn)) | ||||
|             except OSError as ose: | ||||
|                 if ose.errno != errno.EEXIST: | ||||
|                     raise | ||||
|             write_json_file(data, fn) | ||||
|         except Exception: | ||||
|             tb = traceback.format_exc() | ||||
|             self._ydl.report_warning( | ||||
|                 'Writing cache to %r failed: %s' % (fn, tb)) | ||||
|  | ||||
|     def load(self, section, key, dtype='json', default=None): | ||||
|         assert dtype in ('json',) | ||||
|  | ||||
|         if not self.enabled: | ||||
|             return default | ||||
|  | ||||
|         cache_fn = self._get_cache_fn(section, key, dtype) | ||||
|         try: | ||||
|             try: | ||||
|                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef: | ||||
|                     return json.load(cachef) | ||||
|             except ValueError: | ||||
|                 try: | ||||
|                     file_size = os.path.getsize(cache_fn) | ||||
|                 except (OSError, IOError) as oe: | ||||
|                     file_size = str(oe) | ||||
|                 self._ydl.report_warning( | ||||
|                     'Cache retrieval from %s failed (%s)' % (cache_fn, file_size)) | ||||
|         except IOError: | ||||
|             pass  # No cache available | ||||
|  | ||||
|         return default | ||||
|  | ||||
|     def remove(self): | ||||
|         if not self.enabled: | ||||
|             self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)') | ||||
|             return | ||||
|  | ||||
|         cachedir = self._get_root_dir() | ||||
|         if not any((term in cachedir) for term in ('cache', 'tmp')): | ||||
|             raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) | ||||
|  | ||||
|         self._ydl.to_screen( | ||||
|             'Removing cache dir %s .' % cachedir, skip_eol=True) | ||||
|         if os.path.exists(cachedir): | ||||
|             self._ydl.to_screen('.', skip_eol=True) | ||||
|             shutil.rmtree(cachedir) | ||||
|         self._ydl.to_screen('.') | ||||
| @@ -4,6 +4,7 @@ from .addanime import AddAnimeIE | ||||
| from .adultswim import AdultSwimIE | ||||
| from .aftonbladet import AftonbladetIE | ||||
| from .anitube import AnitubeIE | ||||
| from .anysex import AnySexIE | ||||
| from .aol import AolIE | ||||
| from .allocine import AllocineIE | ||||
| from .aparat import AparatIE | ||||
| @@ -23,6 +24,7 @@ from .auengine import AUEngineIE | ||||
| from .bambuser import BambuserIE, BambuserChannelIE | ||||
| from .bandcamp import BandcampIE, BandcampAlbumIE | ||||
| from .bbccouk import BBCCoUkIE | ||||
| from .beeg import BeegIE | ||||
| from .bilibili import BiliBiliIE | ||||
| from .blinkx import BlinkxIE | ||||
| from .bliptv import BlipTVIE, BlipTVUserIE | ||||
| @@ -68,6 +70,7 @@ from .daum import DaumIE | ||||
| from .dfb import DFBIE | ||||
| from .dotsub import DotsubIE | ||||
| from .dreisat import DreiSatIE | ||||
| from .drtuber import DrTuberIE | ||||
| from .drtv import DRTVIE | ||||
| from .dump import DumpIE | ||||
| from .defense import DefenseGouvFrIE | ||||
| @@ -83,8 +86,9 @@ from .ellentv import ( | ||||
|     EllenTVClipsIE, | ||||
| ) | ||||
| from .elpais import ElPaisIE | ||||
| from .empflix import EmpflixIE | ||||
| from .empflix import EMPFlixIE | ||||
| from .engadget import EngadgetIE | ||||
| from .eporner import EpornerIE | ||||
| from .escapist import EscapistIE | ||||
| from .everyonesmixtape import EveryonesMixtapeIE | ||||
| from .exfm import ExfmIE | ||||
| @@ -134,6 +138,7 @@ from .grooveshark import GroovesharkIE | ||||
| from .hark import HarkIE | ||||
| from .helsinki import HelsinkiIE | ||||
| from .hentaistigma import HentaiStigmaIE | ||||
| from .hornbunny import HornBunnyIE | ||||
| from .hotnewhiphop import HotNewHipHopIE | ||||
| from .howcast import HowcastIE | ||||
| from .howstuffworks import HowStuffWorksIE | ||||
| @@ -230,6 +235,7 @@ from .niconico import NiconicoIE | ||||
| from .ninegag import NineGagIE | ||||
| from .noco import NocoIE | ||||
| from .normalboots import NormalbootsIE | ||||
| from .nosvideo import NosVideoIE | ||||
| from .novamov import NovaMovIE | ||||
| from .nowness import NownessIE | ||||
| from .nowvideo import NowVideoIE | ||||
| @@ -257,6 +263,7 @@ from .podomatic import PodomaticIE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import PornHubIE | ||||
| from .pornotube import PornotubeIE | ||||
| from .promptfile import PromptFileIE | ||||
| from .prosiebensat1 import ProSiebenSat1IE | ||||
| from .pyvideo import PyvideoIE | ||||
| from .radiofrance import RadioFranceIE | ||||
| @@ -321,6 +328,7 @@ from .stanfordoc import StanfordOpenClassroomIE | ||||
| from .steam import SteamIE | ||||
| from .streamcloud import StreamcloudIE | ||||
| from .streamcz import StreamCZIE | ||||
| from .sunporno import SunPornoIE | ||||
| from .swrmediathek import SWRMediathekIE | ||||
| from .syfy import SyfyIE | ||||
| from .sztvhu import SztvHuIE | ||||
| @@ -340,6 +348,7 @@ from .theplatform import ThePlatformIE | ||||
| from .thisav import ThisAVIE | ||||
| from .tinypic import TinyPicIE | ||||
| from .tlc import TlcIE, TlcDeIE | ||||
| from .tnaflix import TNAFlixIE | ||||
| from .toutv import TouTvIE | ||||
| from .toypics import ToypicsUserIE, ToypicsIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| @@ -392,6 +401,7 @@ from .vine import ( | ||||
| from .viki import VikiIE | ||||
| from .vk import VKIE | ||||
| from .vodlocker import VodlockerIE | ||||
| from .vporn import VpornIE | ||||
| from .vube import VubeIE | ||||
| from .vuclip import VuClipIE | ||||
| from .vulture import VultureIE | ||||
|   | ||||
							
								
								
									
										61
									
								
								youtube_dl/extractor/anysex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								youtube_dl/extractor/anysex.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AnySexIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?anysex\.com/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://anysex.com/156592/', | ||||
|         'md5': '023e9fbb7f7987f5529a394c34ad3d3d', | ||||
|         'info_dict': { | ||||
|             'id': '156592', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Busty and sexy blondie in her bikini strips for you', | ||||
|             'description': 'md5:de9e418178e2931c10b62966474e1383', | ||||
|             'categories': ['Erotic'], | ||||
|             'duration': 270, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL') | ||||
|  | ||||
|         title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         categories = re.findall( | ||||
|             r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage) | ||||
|  | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'categories': categories, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|  | ||||
|     def _extract_from_webpage(self, webpage, video_id, lang): | ||||
|         json_url = self._html_search_regex( | ||||
|             r'arte_vp_url="(.*?)"', webpage, 'json vp url') | ||||
|             [r'arte_vp_url="(.*?)"', r'data-url="([^"]+)"'], | ||||
|             webpage, 'json vp url') | ||||
|         return self._extract_from_json_url(json_url, video_id, lang) | ||||
|  | ||||
|     def _extract_from_json_url(self, json_url, video_id, lang): | ||||
|   | ||||
							
								
								
									
										65
									
								
								youtube_dl/extractor/beeg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								youtube_dl/extractor/beeg.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BeegIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://beeg.com/5416503', | ||||
|         'md5': '634526ae978711f6b748fe0dd6c11f57', | ||||
|         'info_dict': { | ||||
|             'id': '5416503', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sultry Striptease', | ||||
|             'description': 'md5:6db3c6177972822aaba18652ff59c773', | ||||
|             'categories': list,  # NSFW | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         quality_arr = self._search_regex( | ||||
|             r'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage, 'quality formats') | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': fmt[1], | ||||
|             'format_id': fmt[0], | ||||
|             'height': int(fmt[0][:-1]), | ||||
|         } for fmt in re.findall(r"'([^']+)'\s*:\s*'([^']+)'", quality_arr)] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title') | ||||
|          | ||||
|         description = self._html_search_regex( | ||||
|             r'<meta name="description" content="([^"]*)"', | ||||
|             webpage, 'description', fatal=False) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'\'previewer.url\'\s*:\s*"([^"]*)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         categories_str = self._html_search_regex( | ||||
|             r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) | ||||
|         categories = ( | ||||
|             None if categories_str is None | ||||
|             else categories_str.split(',')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'categories': categories, | ||||
|             'formats': formats, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -1,11 +1,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| translation_table = { | ||||
| _translation_table = { | ||||
|     'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n', | ||||
|     'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r', | ||||
|     'y': 'l', 'z': 'i', | ||||
| @@ -13,6 +15,10 @@ translation_table = { | ||||
| } | ||||
|  | ||||
|  | ||||
| def _decode(s): | ||||
|     return ''.join(_translation_table.get(c, c) for c in s) | ||||
|  | ||||
|  | ||||
| class CliphunterIE(InfoExtractor): | ||||
|     IE_NAME = 'cliphunter' | ||||
|  | ||||
| @@ -22,10 +28,14 @@ class CliphunterIE(InfoExtractor): | ||||
|     ''' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo', | ||||
|         'file': '1012420.flv', | ||||
|         'md5': '15e7740f30428abf70f4223478dc1225', | ||||
|         'md5': 'a2ba71eebf523859fe527a61018f723e', | ||||
|         'info_dict': { | ||||
|             'id': '1012420', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Fun Jynx Maze solo', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'age_limit': 18, | ||||
|             'duration': 1317, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -35,22 +45,55 @@ class CliphunterIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._search_regex( | ||||
|             r'mediaTitle = "([^"]+)"', webpage, 'title') | ||||
|  | ||||
|         pl_fiji = self._search_regex( | ||||
|             r'pl_fiji = \'([^\']+)\'', webpage, 'video data') | ||||
|         pl_c_qual = self._search_regex( | ||||
|             r'pl_c_qual = "(.)"', webpage, 'video quality') | ||||
|         video_title = self._search_regex( | ||||
|             r'mediaTitle = "([^"]+)"', webpage, 'title') | ||||
|  | ||||
|         video_url = ''.join(translation_table.get(c, c) for c in pl_fiji) | ||||
|  | ||||
|         video_url = _decode(pl_fiji) | ||||
|         formats = [{ | ||||
|             'url': video_url, | ||||
|             'format_id': pl_c_qual, | ||||
|             'format_id': 'default-%s' % pl_c_qual, | ||||
|         }] | ||||
|  | ||||
|         qualities_json = self._search_regex( | ||||
|             r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info') | ||||
|         qualities_data = json.loads(qualities_json) | ||||
|  | ||||
|         for i, t in enumerate( | ||||
|                 re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)): | ||||
|             quality_id, crypted_url = t | ||||
|             video_url = _decode(crypted_url) | ||||
|             f = { | ||||
|                 'format_id': quality_id, | ||||
|                 'url': video_url, | ||||
|                 'quality': i, | ||||
|             } | ||||
|             if quality_id in qualities_data: | ||||
|                 qd = qualities_data[quality_id] | ||||
|                 m = re.match( | ||||
|                     r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b> | ||||
|                         \s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd) | ||||
|                 if m: | ||||
|                     f['width'] = int(m.group('width')) | ||||
|                     f['height'] = int(m.group('height')) | ||||
|                     f['tbr'] = int(m.group('tbr')) | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = self._search_regex( | ||||
|             r"var\s+mov_thumb\s*=\s*'([^']+)';", | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|         duration = int_or_none(self._search_regex( | ||||
|             r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'formats': formats, | ||||
|             'duration': duration, | ||||
|             'age_limit': self._rta_search(webpage), | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|   | ||||
| @@ -43,14 +43,14 @@ class ComedyCentralShowsIE(InfoExtractor): | ||||
|                           (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ | ||||
|                          ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| | ||||
|                           (?P<clip> | ||||
|                               (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+)) | ||||
|                               (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) | ||||
|                               |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) | ||||
|                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) | ||||
|                           )| | ||||
|                           (?P<interview> | ||||
|                               extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?))) | ||||
|                      (?:[?#].*|$)''' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', | ||||
|         'md5': '4e2f5cb088a83cd8cdb7756132f9739d', | ||||
|         'info_dict': { | ||||
| @@ -61,7 +61,34 @@ class ComedyCentralShowsIE(InfoExtractor): | ||||
|             'uploader': 'thedailyshow', | ||||
|             'title': 'thedailyshow kristen-stewart part 1', | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] | ||||
|  | ||||
| @@ -185,6 +212,9 @@ class ComedyCentralShowsIE(InfoExtractor): | ||||
|                     'ext': self._video_extensions.get(format, 'mp4'), | ||||
|                     'height': h, | ||||
|                     'width': w, | ||||
|  | ||||
|                     'format_note': 'HTTP 400 at the moment (patches welcome!)', | ||||
|                     'preference': -100, | ||||
|                 }) | ||||
|                 formats.append({ | ||||
|                     'format_id': 'rtmp-%s' % format, | ||||
|   | ||||
| @@ -677,9 +677,12 @@ class InfoExtractor(object): | ||||
|                 } | ||||
|                 codecs = last_info.get('CODECS') | ||||
|                 if codecs: | ||||
|                     video, audio = codecs.split(',') | ||||
|                     f['vcodec'] = video.partition('.')[0] | ||||
|                     f['acodec'] = audio.partition('.')[0] | ||||
|                     # TODO: looks like video codec is not always necessarily goes first | ||||
|                     va_codecs = codecs.split(',') | ||||
|                     if va_codecs[0]: | ||||
|                         f['vcodec'] = va_codecs[0].partition('.')[0] | ||||
|                     if len(va_codecs) > 1 and va_codecs[1]: | ||||
|                         f['acodec'] = va_codecs[1].partition('.')[0] | ||||
|                 resolution = last_info.get('RESOLUTION') | ||||
|                 if resolution: | ||||
|                     width_str, height_str = resolution.split('x') | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import re | ||||
| import json | ||||
| import base64 | ||||
| import zlib | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from hashlib import sha1 | ||||
| from math import pow, sqrt, floor | ||||
| @@ -17,6 +18,7 @@ from ..utils import ( | ||||
|     intlist_to_bytes, | ||||
|     unified_strdate, | ||||
|     clean_html, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_cbc_decrypt, | ||||
| @@ -51,6 +53,26 @@ class CrunchyrollIE(InfoExtractor): | ||||
|         '1080': ('80', '108'), | ||||
|     } | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|         self.report_login() | ||||
|         login_url = 'https://www.crunchyroll.com/?a=formhandler' | ||||
|         data = urlencode_postdata({ | ||||
|             'formname': 'RpcApiUser_Login', | ||||
|             'name': username, | ||||
|             'password': password, | ||||
|         }) | ||||
|         login_request = compat_urllib_request.Request(login_url, data) | ||||
|         login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         self._download_webpage(login_request, None, False, 'Wrong login info') | ||||
|  | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|  | ||||
|     def _decrypt_subtitles(self, data, iv, id): | ||||
|         data = bytes_to_intlist(data) | ||||
|         iv = bytes_to_intlist(iv) | ||||
| @@ -97,6 +119,75 @@ class CrunchyrollIE(InfoExtractor): | ||||
|             output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) | ||||
|         return output | ||||
|  | ||||
|     def _convert_subtitles_to_ass(self, subtitles): | ||||
|         output = '' | ||||
|  | ||||
|         def ass_bool(strvalue): | ||||
|             assvalue = '0' | ||||
|             if strvalue == '1': | ||||
|                 assvalue = '-1' | ||||
|             return assvalue | ||||
|  | ||||
|         sub_root = xml.etree.ElementTree.fromstring(subtitles) | ||||
|         if not sub_root: | ||||
|             return output | ||||
|  | ||||
|         output = '[Script Info]\n' | ||||
|         output += 'Title: %s\n' % sub_root.attrib["title"] | ||||
|         output += 'ScriptType: v4.00+\n' | ||||
|         output += 'WrapStyle: %s\n' % sub_root.attrib["wrap_style"] | ||||
|         output += 'PlayResX: %s\n' % sub_root.attrib["play_res_x"] | ||||
|         output += 'PlayResY: %s\n' % sub_root.attrib["play_res_y"] | ||||
|         output += """ScaledBorderAndShadow: yes | ||||
|  | ||||
| [V4+ Styles] | ||||
| Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding | ||||
| """ | ||||
|         for style in sub_root.findall('./styles/style'): | ||||
|             output += 'Style: ' + style.attrib["name"] | ||||
|             output += ',' + style.attrib["font_name"] | ||||
|             output += ',' + style.attrib["font_size"] | ||||
|             output += ',' + style.attrib["primary_colour"] | ||||
|             output += ',' + style.attrib["secondary_colour"] | ||||
|             output += ',' + style.attrib["outline_colour"] | ||||
|             output += ',' + style.attrib["back_colour"] | ||||
|             output += ',' + ass_bool(style.attrib["bold"]) | ||||
|             output += ',' + ass_bool(style.attrib["italic"]) | ||||
|             output += ',' + ass_bool(style.attrib["underline"]) | ||||
|             output += ',' + ass_bool(style.attrib["strikeout"]) | ||||
|             output += ',' + style.attrib["scale_x"] | ||||
|             output += ',' + style.attrib["scale_y"] | ||||
|             output += ',' + style.attrib["spacing"] | ||||
|             output += ',' + style.attrib["angle"] | ||||
|             output += ',' + style.attrib["border_style"] | ||||
|             output += ',' + style.attrib["outline"] | ||||
|             output += ',' + style.attrib["shadow"] | ||||
|             output += ',' + style.attrib["alignment"] | ||||
|             output += ',' + style.attrib["margin_l"] | ||||
|             output += ',' + style.attrib["margin_r"] | ||||
|             output += ',' + style.attrib["margin_v"] | ||||
|             output += ',' + style.attrib["encoding"] | ||||
|             output += '\n' | ||||
|  | ||||
|         output += """ | ||||
| [Events] | ||||
| Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
| """ | ||||
|         for event in sub_root.findall('./events/event'): | ||||
|             output += 'Dialogue: 0' | ||||
|             output += ',' + event.attrib["start"] | ||||
|             output += ',' + event.attrib["end"] | ||||
|             output += ',' + event.attrib["style"] | ||||
|             output += ',' + event.attrib["name"] | ||||
|             output += ',' + event.attrib["margin_l"] | ||||
|             output += ',' + event.attrib["margin_r"] | ||||
|             output += ',' + event.attrib["margin_v"] | ||||
|             output += ',' + event.attrib["effect"] | ||||
|             output += ',' + event.attrib["text"] | ||||
|             output += '\n' | ||||
|  | ||||
|         return output | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
| @@ -158,6 +249,7 @@ class CrunchyrollIE(InfoExtractor): | ||||
|             }) | ||||
|  | ||||
|         subtitles = {} | ||||
|         sub_format = self._downloader.params.get('subtitlesformat', 'srt') | ||||
|         for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): | ||||
|             sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\ | ||||
|                                               video_id, note='Downloading subtitles for '+sub_name) | ||||
| @@ -174,7 +266,10 @@ class CrunchyrollIE(InfoExtractor): | ||||
|             lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) | ||||
|             if not lang_code: | ||||
|                 continue | ||||
|             subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) | ||||
|             if sub_format == 'ass': | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_ass(subtitle) | ||||
|             else: | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) | ||||
|  | ||||
|         return { | ||||
|             'id':          video_id, | ||||
|   | ||||
							
								
								
									
										70
									
								
								youtube_dl/extractor/drtuber.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								youtube_dl/extractor/drtuber.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import str_to_int | ||||
|  | ||||
|  | ||||
| class DrTuberIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', | ||||
|         'md5': '93e680cf2536ad0dfb7e74d94a89facd', | ||||
|         'info_dict': { | ||||
|             'id': '1740434', | ||||
|             'display_id': 'hot-perky-blonde-naked-golf', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hot Perky Blonde Naked Golf', | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
|             'categories': list,  # NSFW | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<source src="([^"]+)"', webpage, 'video URL') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>([^<]+)\s*-\s*Free', webpage, 'title') | ||||
|  | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'poster="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         like_count = str_to_int(self._html_search_regex( | ||||
|             r'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>', | ||||
|             webpage, 'like count', fatal=False)) | ||||
|         dislike_count = str_to_int(self._html_search_regex( | ||||
|             r'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>', | ||||
|             webpage, 'like count', fatal=False)) | ||||
|         comment_count = str_to_int(self._html_search_regex( | ||||
|             r'<span class="comments_count">([\d,\.]+)</span>', | ||||
|             webpage, 'comment count', fatal=False)) | ||||
|  | ||||
|         cats_str = self._html_search_regex( | ||||
|             r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) | ||||
|         categories = None if cats_str is None else cats_str.split(' ') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'like_count': like_count, | ||||
|             'dislike_count': dislike_count, | ||||
|             'comment_count': comment_count, | ||||
|             'categories': categories, | ||||
|             'age_limit': self._rta_search(webpage), | ||||
|         } | ||||
| @@ -1,58 +1,25 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import fix_xml_ampersands | ||||
| from .tnaflix import TNAFlixIE | ||||
|  | ||||
|  | ||||
| class EmpflixIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html' | ||||
| class EMPFlixIE(TNAFlixIE): | ||||
|     _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html' | ||||
|  | ||||
|     _TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"' | ||||
|     _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"' | ||||
|     _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', | ||||
|         'md5': 'b1bc15b6412d33902d6e5952035fcabc', | ||||
|         'info_dict': { | ||||
|             'id': '33051', | ||||
|             'display_id': 'Amateur-Finger-Fuck', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Amateur Finger Fuck', | ||||
|             'description': 'Amateur solo finger fucking.', | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'name="title" value="(?P<title>[^"]*)"', webpage, 'title') | ||||
|         video_description = self._html_search_regex( | ||||
|             r'name="description" value="([^"]*)"', webpage, 'description', fatal=False) | ||||
|  | ||||
|         cfg_url = self._html_search_regex( | ||||
|             r'flashvars\.config = escape\("([^"]+)"', | ||||
|             webpage, 'flashvars.config') | ||||
|  | ||||
|         cfg_xml = self._download_xml( | ||||
|             cfg_url, video_id, note='Downloading metadata', | ||||
|             transform_source=fix_xml_ampersands) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': item.find('videoLink').text, | ||||
|                 'format_id': item.find('res').text, | ||||
|             } for item in cfg_xml.findall('./quality/item') | ||||
|         ] | ||||
|         thumbnail = cfg_xml.find('./startThumb').text | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'description': video_description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										75
									
								
								youtube_dl/extractor/eporner.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								youtube_dl/extractor/eporner.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EpornerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', | ||||
|         'md5': '3b427ae4b9d60619106de3185c2987cd', | ||||
|         'info_dict': { | ||||
|             'id': '95008', | ||||
|             'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Infamous Tiffany Teen Strip Tease Video', | ||||
|             'duration': 194, | ||||
|             'view_count': int, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>(.*?) - EPORNER', webpage, 'title') | ||||
|  | ||||
|         redirect_code = self._html_search_regex( | ||||
|             r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id, | ||||
|             webpage, 'redirect_code') | ||||
|         redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code) | ||||
|         player_code = self._download_webpage( | ||||
|             redirect_url, display_id, note='Downloading player config') | ||||
|  | ||||
|         sources = self._search_regex( | ||||
|             r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources') | ||||
|  | ||||
|         formats = [] | ||||
|         for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources): | ||||
|             fmt = { | ||||
|                 'url': video_url, | ||||
|                 'format_id': format_id, | ||||
|             } | ||||
|             m = re.search(r'^(\d+)', format_id) | ||||
|             if m: | ||||
|                 fmt['height'] = int(m.group(1)) | ||||
|             formats.append(fmt) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'class="mbtim">([0-9:]+)</div>', webpage, 'duration', | ||||
|             fatal=False)) | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'id="cinemaviews">\s*([0-9,]+)\s*<small>views', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|             'age_limit': self._rta_search(webpage), | ||||
|         } | ||||
| @@ -79,7 +79,8 @@ class FacebookIE(InfoExtractor): | ||||
|  | ||||
|             check_form = { | ||||
|                 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), | ||||
|                 'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'), | ||||
|                 'h': self._search_regex( | ||||
|                     r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'), | ||||
|                 'name_action_selected': 'dont_save', | ||||
|             } | ||||
|             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) | ||||
|   | ||||
| @@ -366,7 +366,22 @@ class GenericIE(InfoExtractor): | ||||
|                 'extract_flat': False, | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         # MLB embed | ||||
|         { | ||||
|             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/', | ||||
|             'md5': '96f09a37e44da40dd083e12d9a683327', | ||||
|             'info_dict': { | ||||
|                 'id': '33322633', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Ump changes call to ball', | ||||
|                 'description': 'md5:71c11215384298a172a6dcb4c2e20685', | ||||
|                 'duration': 48, | ||||
|                 'timestamp': 1401537900, | ||||
|                 'upload_date': '20140531', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_download_webpage(self, video_id): | ||||
| @@ -809,6 +824,12 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'SBS') | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'MLB') | ||||
|  | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if not found: | ||||
|   | ||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/hornbunny.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/hornbunny.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class HornBunnyIE(InfoExtractor): | ||||
|     _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html', | ||||
|         'md5': '95e40865aedd08eff60272b704852ad7', | ||||
|         'info_dict': { | ||||
|             'id': '5227', | ||||
|             'ext': 'flv', | ||||
|             'title': 'panty slut jerk off instruction', | ||||
|             'duration': 550, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             url, video_id, note='Downloading initial webpage') | ||||
|         title = self._html_search_regex( | ||||
|             r'class="title">(.*?)</h2>', webpage, 'title') | ||||
|         redirect_url = self._html_search_regex( | ||||
|             r'pg&settings=(.*?)\|0"\);', webpage, 'title') | ||||
|         webpage2 = self._download_webpage(redirect_url, video_id) | ||||
|         video_url = self._html_search_regex( | ||||
|             r'flvMask:(.*?);', webpage2, 'video_url') | ||||
|          | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'<strong>Runtime:</strong>\s*([0-9:]+)</div>', | ||||
|             webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'<strong>Views:</strong>\s*(\d+)</div>', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'ext': 'flv', | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -11,7 +11,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class MLBIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)' | ||||
|     _VALID_URL = r'https?://m\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|shared/video/embed/embed\.html\?.*?\bcontent_id=)(?P<id>n?\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea', | ||||
| @@ -69,6 +69,10 @@ class MLBIE(InfoExtractor): | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -3,18 +3,23 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     remove_end, | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NBAIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', | ||||
|         'md5': u'c0edcfc37607344e2ff8f13c378c88a4', | ||||
|         'md5': 'c0edcfc37607344e2ff8f13c378c88a4', | ||||
|         'info_dict': { | ||||
|             'id': '0021200253-okc-bkn-recap.nba', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', | ||||
|             'title': 'Thunder vs. Nets', | ||||
|             'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', | ||||
|             'duration': 181, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -27,13 +32,18 @@ class NBAIE(InfoExtractor): | ||||
|         video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' | ||||
|  | ||||
|         shortened_video_id = video_id.rpartition('/')[2] | ||||
|         title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '') | ||||
|         title = remove_end( | ||||
|             self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com') | ||||
|  | ||||
|         description = self._og_search_description(webpage) | ||||
|         duration = parse_duration( | ||||
|             self._html_search_meta('duration', webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': shortened_video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										65
									
								
								youtube_dl/extractor/nosvideo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								youtube_dl/extractor/nosvideo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
|  | ||||
| _x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'}) | ||||
| _find = lambda el, p: el.find(_x(p)).text.strip() | ||||
|  | ||||
|  | ||||
| class NosVideoIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \ | ||||
|                  '(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?' | ||||
|     _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml' | ||||
|     _TEST = { | ||||
|         'url': 'http://nosvideo.com/?v=drlp6s40kg54', | ||||
|         'md5': '4b4ac54c6ad5d70ab88f2c2c6ccec71c', | ||||
|         'info_dict': { | ||||
|             'id': 'drlp6s40kg54', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'big_buck_bunny_480p_surround-fix.avi.mp4', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         fields = { | ||||
|             'id': video_id, | ||||
|             'op': 'download1', | ||||
|             'method_free': 'Continue to Video', | ||||
|         } | ||||
|         post = compat_urllib_parse.urlencode(fields) | ||||
|         req = compat_urllib_request.Request(url, post) | ||||
|         req.add_header('Content-type', 'application/x-www-form-urlencoded') | ||||
|         webpage = self._download_webpage(req, video_id, | ||||
|                                          'Downloading download page') | ||||
|         xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID') | ||||
|         playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id) | ||||
|         playlist = self._download_xml(playlist_url, video_id) | ||||
|  | ||||
|         track = playlist.find(_x('.//xspf:track')) | ||||
|         title = _find(track, './xspf:title') | ||||
|         url = _find(track, './xspf:file') | ||||
|         thumbnail = _find(track, './xspf:image') | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|             'url': url, | ||||
|         }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -5,6 +5,7 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     qualities, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -17,7 +18,7 @@ class NPOIE(InfoExtractor): | ||||
|         'md5': '4b3f9c429157ec4775f2c9cb7b911016', | ||||
|         'info_dict': { | ||||
|             'id': 'VPWON_1220719', | ||||
|             'ext': 'mp4', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'Nieuwsuur', | ||||
|             'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', | ||||
|             'upload_date': '20140622', | ||||
| @@ -39,24 +40,32 @@ class NPOIE(InfoExtractor): | ||||
|             video_id, | ||||
|             note='Downloading token' | ||||
|         ) | ||||
|         token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token') | ||||
|         streams_info = self._download_json( | ||||
|             'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token), | ||||
|             video_id | ||||
|         ) | ||||
|         token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token') | ||||
|  | ||||
|         stream_info = self._download_json( | ||||
|             streams_info['streams'][0] + '&type=json', | ||||
|             video_id, | ||||
|             'Downloading stream info' | ||||
|         ) | ||||
|         formats = [] | ||||
|         quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std']) | ||||
|         for format_id in metadata['pubopties']: | ||||
|             streams_info = self._download_json( | ||||
|                 'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token), | ||||
|                 video_id, 'Downloading %s streams info' % format_id) | ||||
|             stream_info = self._download_json( | ||||
|                 streams_info['streams'][0] + '&type=json', | ||||
|                 video_id, 'Downloading %s stream info' % format_id) | ||||
|             if format_id == 'adaptive': | ||||
|                 formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': stream_info['url'], | ||||
|                     'format_id': format_id, | ||||
|                     'quality': quality(format_id), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': metadata['titel'], | ||||
|             'ext': 'mp4', | ||||
|             'url': stream_info['url'], | ||||
|             'description': metadata['info'], | ||||
|             'thumbnail': metadata['images'][-1]['url'], | ||||
|             'upload_date': unified_strdate(metadata['gidsdatum']), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										67
									
								
								youtube_dl/extractor/promptfile.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								youtube_dl/extractor/promptfile.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     determine_ext, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PromptFileIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)' | ||||
|     _FILE_NOT_FOUND_REGEX = r'<div.+id="not_found_msg".+>.+</div>[^-]' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF', | ||||
|         'md5': 'd1451b6302da7215485837aaea882c4c', | ||||
|         'info_dict': { | ||||
|             'id': 'D21B4746E9-F01462F0FF', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Birds.mp4', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None: | ||||
|             raise ExtractorError('Video %s does not exist' % video_id, | ||||
|                                  expected=True) | ||||
|  | ||||
|         fields = dict(re.findall(r'''(?x)type="hidden"\s+ | ||||
|             name="(.+?)"\s+ | ||||
|             value="(.*?)" | ||||
|             ''', webpage)) | ||||
|         post = compat_urllib_parse.urlencode(fields) | ||||
|         req = compat_urllib_request.Request(url, post) | ||||
|         req.add_header('Content-type', 'application/x-www-form-urlencoded') | ||||
|         webpage = self._download_webpage( | ||||
|             req, video_id, 'Downloading video page') | ||||
|  | ||||
|         url = self._html_search_regex(r'url:\s*\'([^\']+)\'', webpage, 'URL') | ||||
|         title = self._html_search_regex( | ||||
|             r'<span.+title="([^"]+)">', webpage, 'title') | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False, flags=re.DOTALL) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|             'url': url, | ||||
|             'ext': determine_ext(title), | ||||
|         }] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -12,22 +12,16 @@ class RtlXlIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', | ||||
|         'md5': 'cc16baa36a6c169391f0764fa6b16654', | ||||
|         'info_dict': { | ||||
|             'id': '6e4203a6-0a5e-3596-8424-c599a59e0677', | ||||
|             'ext': 'flv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'RTL Nieuws - Laat', | ||||
|             'description': 'Dagelijks het laatste nieuws uit binnen- en ' | ||||
|                 'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van ' | ||||
|                 'onze mobiele apps.', | ||||
|             'description': 'md5:6b61f66510c8889923b11f2778c72dc5', | ||||
|             'timestamp': 1408051800, | ||||
|             'upload_date': '20140814', | ||||
|             'duration': 576.880, | ||||
|         }, | ||||
|         'params': { | ||||
|             # We download the first bytes of the first fragment, it can't be | ||||
|             # processed by the f4m downloader beacuse it isn't complete | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -41,14 +35,32 @@ class RtlXlIE(InfoExtractor): | ||||
|         material = info['material'][0] | ||||
|         episode_info = info['episodes'][0] | ||||
|  | ||||
|         f4m_url = 'http://manifest.us.rtl.nl' + material['videopath'] | ||||
|         progname = info['abstracts'][0]['name'] | ||||
|         subtitle = material['title'] or info['episodes'][0]['name'] | ||||
|  | ||||
|         videopath = material['videopath'] | ||||
|         f4m_url = 'http://manifest.us.rtl.nl' + videopath | ||||
|  | ||||
|         formats = self._extract_f4m_formats(f4m_url, uuid) | ||||
|  | ||||
|         video_urlpart = videopath.split('/flash/')[1][:-4] | ||||
|         PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4' | ||||
|  | ||||
|         formats.extend([ | ||||
|             { | ||||
|                 'url': PG_URL_TEMPLATE % ('a2m', video_urlpart), | ||||
|                 'format_id': 'pg-sd', | ||||
|             }, | ||||
|             { | ||||
|                 'url': PG_URL_TEMPLATE % ('a3m', video_urlpart), | ||||
|                 'format_id': 'pg-hd', | ||||
|             } | ||||
|         ]) | ||||
|  | ||||
|         return { | ||||
|             'id': uuid, | ||||
|             'title': '%s - %s' % (progname, subtitle), | ||||
|             'formats': self._extract_f4m_formats(f4m_url, uuid), | ||||
|             'formats': formats, | ||||
|             'timestamp': material['original_date'], | ||||
|             'description': episode_info['synopsis'], | ||||
|             'duration': parse_duration(material.get('duration')), | ||||
|   | ||||
							
								
								
									
										70
									
								
								youtube_dl/extractor/sunporno.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								youtube_dl/extractor/sunporno.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SunPornoIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?sunporno\.com/videos/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.sunporno.com/videos/807778/', | ||||
|         'md5': '6457d3c165fd6de062b99ef6c2ff4c86', | ||||
|         'info_dict': { | ||||
|             'id': '807778', | ||||
|             'ext': 'flv', | ||||
|             'title': 'md5:0a400058e8105d39e35c35e7c5184164', | ||||
|             'description': 'md5:a31241990e1bd3a64e72ae99afb325fb', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 302, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title') | ||||
|         description = self._html_search_meta('description', webpage, 'description') | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'<span>Duration: (\d+:\d+)</span>', webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'<span class="views">(\d+)</span>', webpage, 'view count', fatal=False)) | ||||
|         comment_count = int_or_none(self._html_search_regex( | ||||
|             r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False)) | ||||
|  | ||||
|         formats = [] | ||||
|         quality = qualities(['mp4', 'flv']) | ||||
|         for video_url in re.findall(r'<source src="([^"]+)"', webpage): | ||||
|             video_ext = determine_ext(video_url) | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': video_ext, | ||||
|                 'quality': quality(video_ext), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'formats': formats, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', | ||||
|         u'playlist': [ | ||||
|         'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', | ||||
|         'info_dict': { | ||||
|             'id': '57758', | ||||
|             'title': 'Learning Topic Models --- Going beyond SVD', | ||||
|         }, | ||||
|         'playlist': [ | ||||
|             { | ||||
|                 u'file': u'57758.flv', | ||||
|                 u'info_dict': { | ||||
|                     u'title': u'Learning Topic Models --- Going beyond SVD', | ||||
|                 'info_dict': { | ||||
|                     'id': '57758', | ||||
|                     'ext': 'flv', | ||||
|                     'title': 'Learning Topic Models --- Going beyond SVD', | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 u'file': u'57758-slides.flv', | ||||
|                 u'info_dict': { | ||||
|                     u'title': u'Learning Topic Models --- Going beyond SVD', | ||||
|                 'info_dict': { | ||||
|                     'id': '57758-slides', | ||||
|                     'ext': 'flv', | ||||
|                     'title': 'Learning Topic Models --- Going beyond SVD', | ||||
|                 }, | ||||
|             }, | ||||
|         ], | ||||
|         u'params': { | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             u'skip_download': True, | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         talk_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, talk_id) | ||||
|         rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage, | ||||
|             u'rtmp url') | ||||
|         play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', | ||||
|             webpage, u'presenter play path') | ||||
|         rtmp_url = self._search_regex( | ||||
|             r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url') | ||||
|         play_path = self._search_regex( | ||||
|             r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', | ||||
|             webpage, 'presenter play path') | ||||
|         title = clean_html(get_element_by_attribute('class', 'title', webpage)) | ||||
|         video_info = { | ||||
|                 'id': talk_id, | ||||
|                 'title': title, | ||||
|                 'url': rtmp_url, | ||||
|                 'play_path': play_path, | ||||
|                 'ext': 'flv', | ||||
|             } | ||||
|             'id': talk_id, | ||||
|             'title': title, | ||||
|             'url': rtmp_url, | ||||
|             'play_path': play_path, | ||||
|             'ext': 'flv', | ||||
|         } | ||||
|         m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage) | ||||
|         if m_slides is None: | ||||
|             return video_info | ||||
|         else: | ||||
|             return [ | ||||
|                 video_info, | ||||
|                 # The slides video | ||||
|                 { | ||||
|                     'id': talk_id + '-slides', | ||||
|                     'title': title, | ||||
|                     'url': rtmp_url, | ||||
|                     'play_path': m_slides.group(1), | ||||
|                     'ext': 'flv', | ||||
|                 }, | ||||
|             ] | ||||
|             return { | ||||
|                 '_type': 'playlist', | ||||
|                 'id': talk_id, | ||||
|                 'title': title, | ||||
|                 'entries': [ | ||||
|                     video_info, | ||||
|                     # The slides video | ||||
|                     { | ||||
|                         'id': talk_id + '-slides', | ||||
|                         'title': title, | ||||
|                         'url': rtmp_url, | ||||
|                         'play_path': m_slides.group(1), | ||||
|                         'ext': 'flv', | ||||
|                     }, | ||||
|                 ], | ||||
|             } | ||||
|   | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/tnaflix.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/tnaflix.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     fix_xml_ampersands, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TNAFlixIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)' | ||||
|  | ||||
|     _TITLE_REGEX = None | ||||
|     _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>' | ||||
|     _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878', | ||||
|         'md5': 'ecf3498417d09216374fc5907f9c6ec0', | ||||
|         'info_dict': { | ||||
|             'id': '553878', | ||||
|             'display_id': 'Carmella-Decesare-striptease', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Carmella Decesare - striptease', | ||||
|             'description': '', | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'duration': 91, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage) | ||||
|         description = self._html_search_regex( | ||||
|             self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='') | ||||
|  | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         duration = self._html_search_meta('duration', webpage, 'duration', default=None) | ||||
|         if duration: | ||||
|             duration = parse_duration(duration[1:]) | ||||
|  | ||||
|         cfg_url = self._html_search_regex( | ||||
|             self._CONFIG_REGEX, webpage, 'flashvars.config') | ||||
|  | ||||
|         cfg_xml = self._download_xml( | ||||
|             cfg_url, display_id, note='Downloading metadata', | ||||
|             transform_source=fix_xml_ampersands) | ||||
|  | ||||
|         thumbnail = cfg_xml.find('./startThumb').text | ||||
|  | ||||
|         formats = [] | ||||
|         for item in cfg_xml.findall('./quality/item'): | ||||
|             video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text) | ||||
|             format_id = item.find('res').text | ||||
|             fmt = { | ||||
|                 'url': video_url, | ||||
|                 'format_id': format_id, | ||||
|             } | ||||
|             m = re.search(r'^(\d+)', format_id) | ||||
|             if m: | ||||
|                 fmt['height'] = int(m.group(1)) | ||||
|             formats.append(fmt) | ||||
|         self._sort_formats(formats) | ||||
|          | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'age_limit': age_limit, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -47,7 +47,7 @@ class ToypicsUserIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': 'Mikey', | ||||
|         }, | ||||
|         'playlist_mincount': 9917, | ||||
|         'playlist_mincount': 19, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| @@ -9,22 +11,29 @@ from .common import InfoExtractor | ||||
| class TudouIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', | ||||
|         u'file': u'159448201.f4v', | ||||
|         u'md5': u'140a49ed444bd22f93330985d8475fcb', | ||||
|         u'info_dict': { | ||||
|             u"title": u"卡马乔国足开大脚长传冲吊集锦" | ||||
|         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', | ||||
|         'md5': '140a49ed444bd22f93330985d8475fcb', | ||||
|         'info_dict': { | ||||
|             'id': '159448201', | ||||
|             'ext': 'f4v', | ||||
|             'title': '卡马乔国足开大脚长传冲吊集锦', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html', | ||||
|         u'file': u'todo.mp4', | ||||
|         u'md5': u'todo.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'todo.mp4', | ||||
|     }, { | ||||
|         'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', | ||||
|         'info_dict': { | ||||
|             'id': '117049447', | ||||
|             'ext': 'f4v', | ||||
|             'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html', | ||||
|         'info_dict': { | ||||
|             'title': 'todo.mp4', | ||||
|         }, | ||||
|         u'add_ie': [u'Youku'], | ||||
|         u'skip': u'Only works from China' | ||||
|         'add_ie': ['Youku'], | ||||
|         'skip': 'Only works from China' | ||||
|     }] | ||||
|  | ||||
|     def _url_for_id(self, id, quality = None): | ||||
| @@ -44,20 +53,22 @@ class TudouIE(InfoExtractor): | ||||
|         if m and m.group(1): | ||||
|             return { | ||||
|                 '_type': 'url', | ||||
|                 'url': u'youku:' + m.group(1), | ||||
|                 'url': 'youku:' + m.group(1), | ||||
|                 'ie_key': 'Youku' | ||||
|             } | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r",kw:\s*['\"](.+?)[\"']", webpage, u'title') | ||||
|             r",kw:\s*['\"](.+?)[\"']", webpage, 'title') | ||||
|         thumbnail_url = self._search_regex( | ||||
|             r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False) | ||||
|             r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False) | ||||
|  | ||||
|         segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') | ||||
|         segments = json.loads(segs_json) | ||||
|         # It looks like the keys are the arguments that have to be passed as | ||||
|         # the hd field in the request url, we pick the higher | ||||
|         quality = sorted(segments.keys())[-1] | ||||
|         # Also, filter non-number qualities (see issue #3643). | ||||
|         quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), | ||||
|                          key=lambda k: int(k))[-1] | ||||
|         parts = segments[quality] | ||||
|         result = [] | ||||
|         len_parts = len(parts) | ||||
| @@ -67,12 +78,13 @@ class TudouIE(InfoExtractor): | ||||
|             part_id = part['k'] | ||||
|             final_url = self._url_for_id(part_id, quality) | ||||
|             ext = (final_url.split('?')[0]).split('.')[-1] | ||||
|             part_info = {'id': part_id, | ||||
|                           'url': final_url, | ||||
|                           'ext': ext, | ||||
|                           'title': title, | ||||
|                           'thumbnail': thumbnail_url, | ||||
|                           } | ||||
|             part_info = { | ||||
|                 'id': '%s' % part_id, | ||||
|                 'url': final_url, | ||||
|                 'ext': ext, | ||||
|                 'title': title, | ||||
|                 'thumbnail': thumbnail_url, | ||||
|             } | ||||
|             result.append(part_info) | ||||
|  | ||||
|         return result | ||||
|   | ||||
| @@ -5,80 +5,82 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     clean_html, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TvigleIE(InfoExtractor): | ||||
|     IE_NAME = 'tvigle' | ||||
|     IE_DESC = 'Интернет-телевидение Tvigle.ru' | ||||
|     _VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)' | ||||
|     _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081', | ||||
|             'md5': '09afba4616666249f087efc6dcf83cb3', | ||||
|             'url': 'http://www.tvigle.ru/video/brat-2/', | ||||
|             'md5': '72cb7eab33e54314e1790da402d3c9c3', | ||||
|             'info_dict': { | ||||
|                 'id': '503081', | ||||
|                 'ext': 'flv', | ||||
|                 'id': '5119390', | ||||
|                 'display_id': 'brat-2', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Брат 2 ', | ||||
|                 'description': 'md5:f5a42970f50648cee3d7ad740f3ae769', | ||||
|                 'upload_date': '20110919', | ||||
|                 'description': 'md5:5751f4fe345a58e1692585c361294bd8', | ||||
|                 'duration': 7356.369, | ||||
|                 'age_limit': 0, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433', | ||||
|             'md5': 'e7efe5350dd5011d0de6550b53c3ba7b', | ||||
|             'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/', | ||||
|             'md5': 'd9012d7c7c598fe7a11d7fb46dc1f574', | ||||
|             'info_dict': { | ||||
|                 'id': '676433', | ||||
|                 'ext': 'flv', | ||||
|                 'id': '5142516', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком', | ||||
|                 'description': 'md5:027f7dc872948f14c96d19b4178428a4', | ||||
|                 'upload_date': '20121218', | ||||
|                 'duration': 186.080, | ||||
|                 'age_limit': 0, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         video_data = self._download_xml( | ||||
|             'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video = video_data.find('./video') | ||||
|         video_id = self._html_search_regex( | ||||
|             r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id') | ||||
|  | ||||
|         title = video.get('name') | ||||
|         description = video.get('anons') | ||||
|         if description: | ||||
|             description = clean_html(description) | ||||
|         thumbnail = video_data.get('img') | ||||
|         upload_date = unified_strdate(video.get('date')) | ||||
|         like_count = int_or_none(video.get('vtp')) | ||||
|         video_data = self._download_json( | ||||
|             'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id) | ||||
|  | ||||
|         item = video_data['playlist']['items'][0] | ||||
|  | ||||
|         title = item['title'] | ||||
|         description = item['description'] | ||||
|         thumbnail = item['thumbnail'] | ||||
|         duration = float_or_none(item['durationMilliseconds'], 1000) | ||||
|         age_limit = str_to_int(item['ageRestrictions']) | ||||
|  | ||||
|         formats = [] | ||||
|         for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]): | ||||
|             video_url = video.get(format_id) | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': format_id, | ||||
|                 'format_note': format_note, | ||||
|                 'quality': num, | ||||
|             }) | ||||
|  | ||||
|         for vcodec, fmts in item['videos'].items(): | ||||
|             for quality, video_url in fmts.items(): | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'format_id': '%s-%s' % (vcodec, quality), | ||||
|                     'vcodec': vcodec, | ||||
|                     'height': int(quality[:-1]), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'like_count': like_count, | ||||
|             'age_limit': 18, | ||||
|             'duration': duration, | ||||
|             'age_limit': age_limit, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										99
									
								
								youtube_dl/extractor/vporn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								youtube_dl/extractor/vporn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VpornIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/', | ||||
|         'md5': 'facf37c1b86546fa0208058546842c55', | ||||
|         'info_dict': { | ||||
|             'id': '497944', | ||||
|             'display_id': 'violet-on-her-th-birthday', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Violet on her 19th birthday', | ||||
|             'description': 'Violet dances in front of the camera which is sure to get you horny.', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'uploader': 'kileyGrope', | ||||
|             'categories': ['Masturbation', 'Teen'], | ||||
|             'duration': 393, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="description_txt">(.*?)</div>', webpage, 'description', fatal=False) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None) | ||||
|         if thumbnail: | ||||
|             thumbnail = 'http://www.vporn.com' + thumbnail | ||||
|  | ||||
|         uploader = self._html_search_regex( | ||||
|             r'(?s)UPLOADED BY.*?<a href="/user/[^"]+">([^<]+)</a>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|  | ||||
|         categories = re.findall(r'<a href="/cat/[^"]+">([^<]+)</a>', webpage) | ||||
|  | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'duration (\d+ min \d+ sec)', webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         view_count = str_to_int(self._html_search_regex( | ||||
|             r'<span>([\d,\.]+) VIEWS</span>', webpage, 'view count', fatal=False)) | ||||
|         like_count = str_to_int(self._html_search_regex( | ||||
|             r'<span id="like" class="n">([\d,\.]+)</span>', webpage, 'like count', fatal=False)) | ||||
|         dislike_count = str_to_int(self._html_search_regex( | ||||
|             r'<span id="dislike" class="n">([\d,\.]+)</span>', webpage, 'dislike count', fatal=False)) | ||||
|         comment_count = str_to_int(self._html_search_regex( | ||||
|             r'<h4>Comments \(<b>([\d,\.]+)</b>\)</h4>', webpage, 'comment count', fatal=False)) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage): | ||||
|             video_url = video[1] | ||||
|             fmt = { | ||||
|                 'url': video_url, | ||||
|                 'format_id': video[0], | ||||
|             } | ||||
|             m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url) | ||||
|             if m: | ||||
|                 fmt.update({ | ||||
|                     'width': int(m.group('width')), | ||||
|                     'height': int(m.group('height')), | ||||
|                     'vbr': int(m.group('vbr')), | ||||
|                 }) | ||||
|             formats.append(fmt) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'categories': categories, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|             'dislike_count': dislike_count, | ||||
|             'comment_count': comment_count, | ||||
|             'age_limit': 18, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/', | ||||
|         'info_dict': { | ||||
|             'title': 'Sinkhole of bureaucracy', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'md5': 'c3f4b4922ffa259243f68e928db2db8c', | ||||
|             'info_dict': { | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| import errno | ||||
| import io | ||||
| import itertools | ||||
| import json | ||||
| import os.path | ||||
| @@ -21,7 +19,6 @@ from ..utils import ( | ||||
|     compat_str, | ||||
|  | ||||
|     clean_html, | ||||
|     get_cachedir, | ||||
|     get_element_by_id, | ||||
|     get_element_by_attribute, | ||||
|     ExtractorError, | ||||
| @@ -30,7 +27,6 @@ from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     orderedSet, | ||||
|     write_json_file, | ||||
|     uppercase_escape, | ||||
| ) | ||||
|  | ||||
| @@ -316,6 +312,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 u"upload_date": u"20121002", | ||||
|                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", | ||||
|                 u"categories": [u'Science & Technology'], | ||||
|                 'like_count': int, | ||||
|                 'dislike_count': int, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
| @@ -433,26 +431,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         func_id = '%s_%s_%s' % ( | ||||
|             player_type, player_id, self._signature_cache_id(example_sig)) | ||||
|         assert os.path.basename(func_id) == func_id | ||||
|         cache_dir = get_cachedir(self._downloader.params) | ||||
|  | ||||
|         cache_enabled = cache_dir is not None | ||||
|         if cache_enabled: | ||||
|             cache_fn = os.path.join(os.path.expanduser(cache_dir), | ||||
|                                     u'youtube-sigfuncs', | ||||
|                                     func_id + '.json') | ||||
|             try: | ||||
|                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef: | ||||
|                     cache_spec = json.load(cachef) | ||||
|                 return lambda s: u''.join(s[i] for i in cache_spec) | ||||
|             except IOError: | ||||
|                 pass  # No cache available | ||||
|             except ValueError: | ||||
|                 try: | ||||
|                     file_size = os.path.getsize(cache_fn) | ||||
|                 except (OSError, IOError) as oe: | ||||
|                     file_size = str(oe) | ||||
|                 self._downloader.report_warning( | ||||
|                     u'Cache %s failed (%s)' % (cache_fn, file_size)) | ||||
|         cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id) | ||||
|         if cache_spec is not None: | ||||
|             return lambda s: u''.join(s[i] for i in cache_spec) | ||||
|  | ||||
|         if player_type == 'js': | ||||
|             code = self._download_webpage( | ||||
| @@ -470,22 +452,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         else: | ||||
|             assert False, 'Invalid player type %r' % player_type | ||||
|  | ||||
|         if cache_enabled: | ||||
|             try: | ||||
|                 test_string = u''.join(map(compat_chr, range(len(example_sig)))) | ||||
|                 cache_res = res(test_string) | ||||
|                 cache_spec = [ord(c) for c in cache_res] | ||||
|                 try: | ||||
|                     os.makedirs(os.path.dirname(cache_fn)) | ||||
|                 except OSError as ose: | ||||
|                     if ose.errno != errno.EEXIST: | ||||
|                         raise | ||||
|                 write_json_file(cache_spec, cache_fn) | ||||
|             except Exception: | ||||
|                 tb = traceback.format_exc() | ||||
|                 self._downloader.report_warning( | ||||
|                     u'Writing cache to %r failed: %s' % (cache_fn, tb)) | ||||
|         if cache_spec is None: | ||||
|             test_string = u''.join(map(compat_chr, range(len(example_sig)))) | ||||
|             cache_res = res(test_string) | ||||
|             cache_spec = [ord(c) for c in cache_res] | ||||
|  | ||||
|         self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec) | ||||
|         return res | ||||
|  | ||||
|     def _print_sig_code(self, func, example_sig): | ||||
| @@ -784,7 +756,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         m_cat_container = get_element_by_id("eow-category", video_webpage) | ||||
|         m_cat_container = self._search_regex( | ||||
|             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', | ||||
|             video_webpage, 'categories', fatal=False) | ||||
|         if m_cat_container: | ||||
|             category = self._html_search_regex( | ||||
|                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category', | ||||
| @@ -813,15 +787,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             else: | ||||
|                 video_description = u'' | ||||
|  | ||||
|         def _extract_count(klass): | ||||
|         def _extract_count(count_name): | ||||
|             count = self._search_regex( | ||||
|                 r'class="%s">([\d,]+)</span>' % re.escape(klass), | ||||
|                 video_webpage, klass, default=None) | ||||
|                 r'id="watch-%s"[^>]*>.*?([\d,]+)\s*</span>' % re.escape(count_name), | ||||
|                 video_webpage, count_name, default=None) | ||||
|             if count is not None: | ||||
|                 return int(count.replace(',', '')) | ||||
|             return None | ||||
|         like_count = _extract_count(u'likes-count') | ||||
|         dislike_count = _extract_count(u'dislikes-count') | ||||
|         like_count = _extract_count(u'like') | ||||
|         dislike_count = _extract_count(u'dislike') | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, video_webpage) | ||||
| @@ -1051,21 +1025,26 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|         self._login() | ||||
|  | ||||
|     def _ids_to_results(self, ids): | ||||
|         return [self.url_result(vid_id, 'Youtube', video_id=vid_id) | ||||
|                        for vid_id in ids] | ||||
|         return [ | ||||
|             self.url_result(vid_id, 'Youtube', video_id=vid_id) | ||||
|             for vid_id in ids] | ||||
|  | ||||
|     def _extract_mix(self, playlist_id): | ||||
|         # The mixes are generated from a a single video | ||||
|         # the id of the playlist is just 'RD' + video_id | ||||
|         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) | ||||
|         webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix') | ||||
|         webpage = self._download_webpage( | ||||
|             url, playlist_id, u'Downloading Youtube mix') | ||||
|         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) | ||||
|         title_span = (search_title('playlist-title') or | ||||
|             search_title('title long-title') or search_title('title')) | ||||
|         title_span = ( | ||||
|             search_title('playlist-title') or | ||||
|             search_title('title long-title') or | ||||
|             search_title('title')) | ||||
|         title = clean_html(title_span) | ||||
|         video_re = r'''(?x)data-video-username=".*?".*? | ||||
|                        href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id) | ||||
|         ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) | ||||
|         ids = orderedSet(re.findall( | ||||
|             r'''(?xs)data-video-username=".*?".*? | ||||
|                        href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), | ||||
|             webpage)) | ||||
|         url_results = self._ids_to_results(ids) | ||||
|  | ||||
|         return self.playlist_result(url_results, playlist_id, title) | ||||
| @@ -1158,6 +1137,7 @@ class YoutubeTopListIE(YoutubePlaylistIE): | ||||
|             msg = u'Downloading Youtube mix' | ||||
|             if i > 0: | ||||
|                 msg += ', retry #%d' % i | ||||
|  | ||||
|             webpage = self._download_webpage(url, title, msg) | ||||
|             ids = orderedSet(re.findall(video_re, webpage)) | ||||
|             if ids: | ||||
| @@ -1430,12 +1410,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): | ||||
|             paging = mobj.group('paging') | ||||
|         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) | ||||
|  | ||||
| class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' | ||||
|     _FEED_NAME = 'subscriptions' | ||||
|     _PLAYLIST_TITLE = u'Youtube Subscriptions' | ||||
|  | ||||
| class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' | ||||
| @@ -1468,6 +1442,43 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): | ||||
|         return self.url_result(playlist_id, 'YoutubePlaylist') | ||||
|  | ||||
|  | ||||
| class YoutubeSubscriptionsIE(YoutubePlaylistIE): | ||||
|     IE_NAME = u'youtube:subscriptions' | ||||
|     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         title = u'Youtube Subscriptions' | ||||
|         page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title) | ||||
|  | ||||
|         # The extraction process is the same as for playlists, but the regex | ||||
|         # for the video ids doesn't contain an index | ||||
|         ids = [] | ||||
|         more_widget_html = content_html = page | ||||
|  | ||||
|         for page_num in itertools.count(1): | ||||
|             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html) | ||||
|             new_ids = orderedSet(matches) | ||||
|             ids.extend(new_ids) | ||||
|  | ||||
|             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) | ||||
|             if not mobj: | ||||
|                 break | ||||
|  | ||||
|             more = self._download_json( | ||||
|                 'https://youtube.com/%s' % mobj.group('more'), title, | ||||
|                 'Downloading page #%s' % page_num, | ||||
|                 transform_source=uppercase_escape) | ||||
|             content_html = more['content_html'] | ||||
|             more_widget_html = more['load_more_widget_html'] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'title': title, | ||||
|             'entries': self._ids_to_results(ids), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class YoutubeTruncatedURLIE(InfoExtractor): | ||||
|     IE_NAME = 'youtube:truncated_url' | ||||
|     IE_DESC = False  # Do not list | ||||
|   | ||||
| @@ -1076,12 +1076,6 @@ def intlist_to_bytes(xs): | ||||
|         return bytes(xs) | ||||
|  | ||||
|  | ||||
| def get_cachedir(params={}): | ||||
|     cache_root = os.environ.get('XDG_CACHE_HOME', | ||||
|                                 os.path.expanduser('~/.cache')) | ||||
|     return params.get('cachedir', os.path.join(cache_root, 'youtube-dl')) | ||||
|  | ||||
|  | ||||
| # Cross-platform file locking | ||||
| if sys.platform == 'win32': | ||||
|     import ctypes.wintypes | ||||
| @@ -1141,10 +1135,10 @@ else: | ||||
|     import fcntl | ||||
|  | ||||
|     def _lock_file(f, exclusive): | ||||
|         fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) | ||||
|         fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) | ||||
|  | ||||
|     def _unlock_file(f): | ||||
|         fcntl.lockf(f, fcntl.LOCK_UN) | ||||
|         fcntl.flock(f, fcntl.LOCK_UN) | ||||
|  | ||||
|  | ||||
| class locked_file(object): | ||||
| @@ -1318,9 +1312,10 @@ def str_or_none(v, default=None): | ||||
|  | ||||
|  | ||||
| def str_to_int(int_str): | ||||
|     """ A more relaxed version of int_or_none """ | ||||
|     if int_str is None: | ||||
|         return None | ||||
|     int_str = re.sub(r'[,\.]', u'', int_str) | ||||
|     int_str = re.sub(r'[,\.\+]', u'', int_str) | ||||
|     return int(int_str) | ||||
|  | ||||
|  | ||||
| @@ -1332,8 +1327,10 @@ def parse_duration(s): | ||||
|     if s is None: | ||||
|         return None | ||||
|  | ||||
|     s = s.strip() | ||||
|  | ||||
|     m = re.match( | ||||
|         r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?(?P<ms>\.[0-9]+)?$', s) | ||||
|         r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s) | ||||
|     if not m: | ||||
|         return None | ||||
|     res = int(m.group('secs')) | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.08.28.1' | ||||
| __version__ = '2014.09.04.3' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user