Compare commits
	
		
			133 Commits
		
	
	
		
			2016.06.19
			...
			2016.07.03
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | a0cfd82dda | ||
|   | 1b734adb2d | ||
|   | 9b724d7277 | ||
|   | c3a5dd3b5d | ||
|   | e3755a624b | ||
|   | 95cf60e826 | ||
|   | 6b03e1e25d | ||
|   | 712b0b5b70 | ||
|   | 6a424391d9 | ||
|   | dbf0157a26 | ||
|   | 7deef1ba67 | ||
|   | fd6ca38262 | ||
|   | bdafd88da0 | ||
|   | 7a1e71575e | ||
|   | ac2d8f54d1 | ||
|   | 14ff6baa0e | ||
|   | bb08101ec4 | ||
|   | bc4b2d75ba | ||
|   | 35fc3021ba | ||
|   | 347227237b | ||
|   | 564dc3c6e8 | ||
|   | 9f4576a7eb | ||
|   | f11315e8d4 | ||
|   | 0c2ac64bb8 | ||
|   | a9eede3913 | ||
|   | 9e29ef13a3 | ||
|   | eaaaaec042 | ||
|   | 3cb3b60064 | ||
|   | 044e3d91b5 | ||
|   | c9e538a3b1 | ||
|   | 76dad392f5 | ||
|   | 9617b557aa | ||
|   | bf4fa24414 | ||
|   | 20361b4f25 | ||
|   | 05a0068a76 | ||
|   | 66a42309fa | ||
|   | fd94e2671a | ||
|   | 8ff6697861 | ||
|   | eafa643715 | ||
|   | 049da7cb6c | ||
|   | 7dbeee7e22 | ||
|   | 93ad6c6bfa | ||
|   | 329179073b | ||
|   | 4d86d2008e | ||
|   | ab47b6e881 | ||
|   | df43389ade | ||
|   | 397b305cfe | ||
|   | e496fa50cd | ||
|   | 06a96da15b | ||
|   | 70157c2c43 | ||
|   | c58ed8563d | ||
|   | 4c7821227c | ||
|   | 42362fdb5e | ||
|   | 97124e572d | ||
|   | 32616c14cc | ||
|   | 8174d0fe95 | ||
|   | 8704778d95 | ||
|   | c287f2bc60 | ||
|   | 9ea5c04c0d | ||
|   | fd7a7498a4 | ||
|   | e3a6747d8f | ||
|   | f41ffc00d1 | ||
|   | 81fda15369 | ||
|   | 427cd050a3 | ||
|   | b0c200f1ec | ||
|   | 92747e664a | ||
|   | f1f336322d | ||
|   | bf8dd79045 | ||
|   | c6781156aa | ||
|   | f484c5fa25 | ||
|   | 88d9f6c0c4 | ||
|   | 3c9c088f9c | ||
|   | fc3996bfe1 | ||
|   | 5b6ad8630c | ||
|   | 30105f4ac0 | ||
|   | 1143535d76 | ||
|   | 7d52c052ef | ||
|   | a2406fce3c | ||
|   | 3b34ab538c | ||
|   | ac782306f1 | ||
|   | 0c00e889f3 | ||
|   | ce96ed05f4 | ||
|   | 0463b77a1f | ||
|   | 2d185706ea | ||
|   | b72b44318c | ||
|   | 46f59e89ea | ||
|   | b4241e308e | ||
|   | 3d4b08dfc7 | ||
|   | be49068d65 | ||
|   | 525cedb971 | ||
|   | de3c7fe0d4 | ||
|   | 896cc72750 | ||
|   | c1ff6e1ad0 | ||
|   | fee70322d7 | ||
|   | 8065d6c55f | ||
|   | 494172d2e5 | ||
|   | 6e3c2047f8 | ||
|   | 011bd3221b | ||
|   | b46eabecd3 | ||
|   | 0437307a41 | ||
|   | 22b7ac13ef | ||
|   | 96f88e91b7 | ||
|   | 3331a4644d | ||
|   | adf1921dc1 | ||
|   | 97674f0419 | ||
|   | 73843ae8ac | ||
|   | f2bb8c036a | ||
|   | 75ca6bcee2 | ||
|   | 089657ed1f | ||
|   | b5eab86c24 | ||
|   | c8e3e0974b | ||
|   | dfc8f46e1c | ||
|   | c143ddce5d | ||
|   | 169d836feb | ||
|   | 6ae938b295 | ||
|   | cf40fdf5c1 | ||
|   | 23bdae0955 | ||
|   | ca74c90bf5 | ||
|   | 7cfc1e2a10 | ||
|   | 1ac5705f62 | ||
|   | e4f90ea0a7 | ||
|   | cdfc187cd5 | ||
|   | feef925f49 | ||
|   | 19e2d1cdea | ||
|   | 8369a4fe76 | ||
|   | 1f749b6658 | ||
|   | 819707920a | ||
|   | 43518503a6 | ||
|   | 5839d556e4 | ||
|   | 6c83e583b3 | ||
|   | 6aeb64b673 | ||
|   | 6cd64b6806 | ||
|   | e154c65128 | 
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							| @@ -6,8 +6,8 @@ | ||||
|  | ||||
| --- | ||||
|  | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.19.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.19.1** | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.03** | ||||
|  | ||||
| ### Before submitting an *issue* make sure you have: | ||||
| - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections | ||||
| @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> | ||||
| [debug] User config: [] | ||||
| [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] | ||||
| [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 | ||||
| [debug] youtube-dl version 2016.06.19.1 | ||||
| [debug] youtube-dl version 2016.07.03 | ||||
| [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 | ||||
| [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 | ||||
| [debug] Proxy map: {} | ||||
|   | ||||
							
								
								
									
										2
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -175,3 +175,5 @@ Tomáš Čech | ||||
| Déstin Reed | ||||
| Roman Tsiupa | ||||
| Artur Krysiak | ||||
| Jakub Adam Wieczorek | ||||
| Aleksandar Topuzović | ||||
|   | ||||
| @@ -14,15 +14,17 @@ if os.path.exists(lazy_extractors_filename): | ||||
|     os.remove(lazy_extractors_filename) | ||||
|  | ||||
| from youtube_dl.extractor import _ALL_CLASSES | ||||
| from youtube_dl.extractor.common import InfoExtractor | ||||
| from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor | ||||
|  | ||||
| with open('devscripts/lazy_load_template.py', 'rt') as f: | ||||
|     module_template = f.read() | ||||
|  | ||||
| module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)] | ||||
| module_contents = [ | ||||
|     module_template + '\n' + getsource(InfoExtractor.suitable) + '\n', | ||||
|     'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n'] | ||||
|  | ||||
| ie_template = ''' | ||||
| class {name}(LazyLoadExtractor): | ||||
| class {name}({bases}): | ||||
|     _VALID_URL = {valid_url!r} | ||||
|     _module = '{module}' | ||||
| ''' | ||||
| @@ -34,10 +36,20 @@ make_valid_template = ''' | ||||
| ''' | ||||
|  | ||||
|  | ||||
| def get_base_name(base): | ||||
|     if base is InfoExtractor: | ||||
|         return 'LazyLoadExtractor' | ||||
|     elif base is SearchInfoExtractor: | ||||
|         return 'LazyLoadSearchExtractor' | ||||
|     else: | ||||
|         return base.__name__ | ||||
|  | ||||
|  | ||||
| def build_lazy_ie(ie, name): | ||||
|     valid_url = getattr(ie, '_VALID_URL', None) | ||||
|     s = ie_template.format( | ||||
|         name=name, | ||||
|         bases=', '.join(map(get_base_name, ie.__bases__)), | ||||
|         valid_url=valid_url, | ||||
|         module=ie.__module__) | ||||
|     if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: | ||||
| @@ -47,12 +59,35 @@ def build_lazy_ie(ie, name): | ||||
|         s += make_valid_template.format(valid_url=ie._make_valid_url()) | ||||
|     return s | ||||
|  | ||||
| # find the correct sorting and add the required base classes so that sublcasses | ||||
| # can be correctly created | ||||
| classes = _ALL_CLASSES[:-1] | ||||
| ordered_cls = [] | ||||
| while classes: | ||||
|     for c in classes[:]: | ||||
|         bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor)) | ||||
|         stop = False | ||||
|         for b in bases: | ||||
|             if b not in classes and b not in ordered_cls: | ||||
|                 if b.__name__ == 'GenericIE': | ||||
|                     exit() | ||||
|                 classes.insert(0, b) | ||||
|                 stop = True | ||||
|         if stop: | ||||
|             break | ||||
|         if all(b in ordered_cls for b in bases): | ||||
|             ordered_cls.append(c) | ||||
|             classes.remove(c) | ||||
|             break | ||||
| ordered_cls.append(_ALL_CLASSES[-1]) | ||||
|  | ||||
| names = [] | ||||
| for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]: | ||||
|     name = ie.ie_key() + 'IE' | ||||
| for ie in ordered_cls: | ||||
|     name = ie.__name__ | ||||
|     src = build_lazy_ie(ie, name) | ||||
|     module_contents.append(src) | ||||
|     names.append(name) | ||||
|     if ie in _ALL_CLASSES: | ||||
|         names.append(name) | ||||
|  | ||||
| module_contents.append( | ||||
|     '_ALL_CLASSES = [{0}]'.format(', '.join(names))) | ||||
|   | ||||
| @@ -45,7 +45,6 @@ | ||||
|  - **archive.org**: archive.org videos | ||||
|  - **ARD** | ||||
|  - **ARD:mediathek** | ||||
|  - **ARD:mediathek**: Saarländischer Rundfunk | ||||
|  - **arte.tv** | ||||
|  - **arte.tv:+7** | ||||
|  - **arte.tv:cinema** | ||||
| @@ -153,6 +152,8 @@ | ||||
|  - **CSNNE** | ||||
|  - **CSpan**: C-SPAN | ||||
|  - **CtsNews**: 華視新聞 | ||||
|  - **CTV** | ||||
|  - **CTVNews** | ||||
|  - **culturebox.francetvinfo.fr** | ||||
|  - **CultureUnplugged** | ||||
|  - **CWTV** | ||||
| @@ -241,6 +242,7 @@ | ||||
|  - **FreeVideo** | ||||
|  - **Funimation** | ||||
|  - **FunnyOrDie** | ||||
|  - **Fusion** | ||||
|  - **GameInformer** | ||||
|  - **Gamekings** | ||||
|  - **GameOne** | ||||
| @@ -248,7 +250,6 @@ | ||||
|  - **Gamersyde** | ||||
|  - **GameSpot** | ||||
|  - **GameStar** | ||||
|  - **Gametrailers** | ||||
|  - **Gazeta** | ||||
|  - **GDCVault** | ||||
|  - **generic**: Generic downloader that works on some sites | ||||
| @@ -274,6 +275,7 @@ | ||||
|  - **Helsinki**: helsinki.fi | ||||
|  - **HentaiStigma** | ||||
|  - **HistoricFilms** | ||||
|  - **history:topic**: History.com Topic | ||||
|  - **hitbox** | ||||
|  - **hitbox:live** | ||||
|  - **HornBunny** | ||||
| @@ -281,6 +283,8 @@ | ||||
|  - **HotStar** | ||||
|  - **Howcast** | ||||
|  - **HowStuffWorks** | ||||
|  - **HRTi** | ||||
|  - **HRTiPlaylist** | ||||
|  - **HuffPost**: Huffington Post | ||||
|  - **Hypem** | ||||
|  - **Iconosquare** | ||||
| @@ -327,7 +331,7 @@ | ||||
|  - **kuwo:mv**: 酷我音乐 - MV | ||||
|  - **kuwo:singer**: 酷我音乐 - 歌手 | ||||
|  - **kuwo:song**: 酷我音乐 | ||||
|  - **la7.tv** | ||||
|  - **la7.it** | ||||
|  - **Laola1Tv** | ||||
|  - **Le**: 乐视网 | ||||
|  - **Learnr** | ||||
| @@ -360,6 +364,7 @@ | ||||
|  - **MatchTV** | ||||
|  - **MDR**: MDR.DE and KiKA | ||||
|  - **media.ccc.de** | ||||
|  - **META** | ||||
|  - **metacafe** | ||||
|  - **Metacritic** | ||||
|  - **Mgoon** | ||||
| @@ -386,7 +391,7 @@ | ||||
|  - **MovieFap** | ||||
|  - **Moviezine** | ||||
|  - **MPORA** | ||||
|  - **MSNBC** | ||||
|  - **MSN** | ||||
|  - **MTV** | ||||
|  - **mtv.de** | ||||
|  - **mtviggy.com** | ||||
| @@ -440,6 +445,7 @@ | ||||
|  - **nick.de** | ||||
|  - **niconico**: ニコニコ動画 | ||||
|  - **NiconicoPlaylist** | ||||
|  - **NineCNineMedia** | ||||
|  - **njoy**: N-JOY | ||||
|  - **njoy:embed** | ||||
|  - **Noco** | ||||
| @@ -503,8 +509,9 @@ | ||||
|  - **plus.google**: Google Plus | ||||
|  - **pluzz.francetv.fr** | ||||
|  - **podomatic** | ||||
|  - **PolskieRadio** | ||||
|  - **PornHd** | ||||
|  - **PornHub** | ||||
|  - **PornHub**: PornHub and Thumbzilla | ||||
|  - **PornHubPlaylist** | ||||
|  - **PornHubUserVideos** | ||||
|  - **Pornotube** | ||||
| @@ -588,8 +595,10 @@ | ||||
|  - **Shared**: shared.sx and vivo.sx | ||||
|  - **ShareSix** | ||||
|  - **Sina** | ||||
|  - **SixPlay** | ||||
|  - **skynewsarabia:article** | ||||
|  - **skynewsarabia:video** | ||||
|  - **skynewsarabia:video** | ||||
|  - **SkySports** | ||||
|  - **Slideshare** | ||||
|  - **Slutload** | ||||
|  - **smotri**: Smotri.com | ||||
| @@ -621,6 +630,7 @@ | ||||
|  - **SportBoxEmbed** | ||||
|  - **SportDeutschland** | ||||
|  - **Sportschau** | ||||
|  - **sr:mediathek**: Saarländischer Rundfunk | ||||
|  - **SRGSSR** | ||||
|  - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites | ||||
|  - **SSA** | ||||
| @@ -721,6 +731,7 @@ | ||||
|  - **UDNEmbed**: 聯合影音 | ||||
|  - **Unistra** | ||||
|  - **Urort**: NRK P3 Urørt | ||||
|  - **URPlay** | ||||
|  - **USAToday** | ||||
|  - **ustream** | ||||
|  - **ustream:channel** | ||||
| @@ -738,6 +749,7 @@ | ||||
|  - **vh1.com** | ||||
|  - **Vice** | ||||
|  - **ViceShow** | ||||
|  - **Vidbit** | ||||
|  - **Viddler** | ||||
|  - **video.google:search**: Google Video search | ||||
|  - **video.mit.edu** | ||||
|   | ||||
							
								
								
									
										63
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										63
									
								
								setup.py
									
									
									
									
									
								
							| @@ -21,25 +21,37 @@ try: | ||||
|     import py2exe | ||||
| except ImportError: | ||||
|     if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': | ||||
|         print("Cannot import py2exe", file=sys.stderr) | ||||
|         print('Cannot import py2exe', file=sys.stderr) | ||||
|         exit(1) | ||||
|  | ||||
| py2exe_options = { | ||||
|     "bundle_files": 1, | ||||
|     "compressed": 1, | ||||
|     "optimize": 2, | ||||
|     "dist_dir": '.', | ||||
|     "dll_excludes": ['w9xpopen.exe', 'crypt32.dll'], | ||||
|     'bundle_files': 1, | ||||
|     'compressed': 1, | ||||
|     'optimize': 2, | ||||
|     'dist_dir': '.', | ||||
|     'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], | ||||
| } | ||||
|  | ||||
| # Get the version from youtube_dl/version.py without importing the package | ||||
| exec(compile(open('youtube_dl/version.py').read(), | ||||
|              'youtube_dl/version.py', 'exec')) | ||||
|  | ||||
| DESCRIPTION = 'YouTube video downloader' | ||||
| LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites' | ||||
|  | ||||
| py2exe_console = [{ | ||||
|     "script": "./youtube_dl/__main__.py", | ||||
|     "dest_base": "youtube-dl", | ||||
|     'script': './youtube_dl/__main__.py', | ||||
|     'dest_base': 'youtube-dl', | ||||
|     'version': __version__, | ||||
|     'description': DESCRIPTION, | ||||
|     'comments': LONG_DESCRIPTION, | ||||
|     'product_name': 'youtube-dl', | ||||
|     'product_version': __version__, | ||||
| }] | ||||
|  | ||||
| py2exe_params = { | ||||
|     'console': py2exe_console, | ||||
|     'options': {"py2exe": py2exe_options}, | ||||
|     'options': {'py2exe': py2exe_options}, | ||||
|     'zipfile': None | ||||
| } | ||||
|  | ||||
| @@ -72,7 +84,7 @@ else: | ||||
|         params['scripts'] = ['bin/youtube-dl'] | ||||
|  | ||||
| class build_lazy_extractors(Command): | ||||
|     description = "Build the extractor lazy loading module" | ||||
|     description = 'Build the extractor lazy loading module' | ||||
|     user_options = [] | ||||
|  | ||||
|     def initialize_options(self): | ||||
| @@ -87,16 +99,11 @@ class build_lazy_extractors(Command): | ||||
|             dry_run=self.dry_run, | ||||
|         ) | ||||
|  | ||||
| # Get the version from youtube_dl/version.py without importing the package | ||||
| exec(compile(open('youtube_dl/version.py').read(), | ||||
|              'youtube_dl/version.py', 'exec')) | ||||
|  | ||||
| setup( | ||||
|     name='youtube_dl', | ||||
|     version=__version__, | ||||
|     description='YouTube video downloader', | ||||
|     long_description='Small command-line program to download videos from' | ||||
|     ' YouTube.com and other video sites.', | ||||
|     description=DESCRIPTION, | ||||
|     long_description=LONG_DESCRIPTION, | ||||
|     url='https://github.com/rg3/youtube-dl', | ||||
|     author='Ricardo Garcia', | ||||
|     author_email='ytdl@yt-dl.org', | ||||
| @@ -112,17 +119,17 @@ setup( | ||||
|     # test_requires = ['nosetest'], | ||||
|  | ||||
|     classifiers=[ | ||||
|         "Topic :: Multimedia :: Video", | ||||
|         "Development Status :: 5 - Production/Stable", | ||||
|         "Environment :: Console", | ||||
|         "License :: Public Domain", | ||||
|         "Programming Language :: Python :: 2.6", | ||||
|         "Programming Language :: Python :: 2.7", | ||||
|         "Programming Language :: Python :: 3", | ||||
|         "Programming Language :: Python :: 3.2", | ||||
|         "Programming Language :: Python :: 3.3", | ||||
|         "Programming Language :: Python :: 3.4", | ||||
|         "Programming Language :: Python :: 3.5", | ||||
|         'Topic :: Multimedia :: Video', | ||||
|         'Development Status :: 5 - Production/Stable', | ||||
|         'Environment :: Console', | ||||
|         'License :: Public Domain', | ||||
|         'Programming Language :: Python :: 2.6', | ||||
|         'Programming Language :: Python :: 2.7', | ||||
|         'Programming Language :: Python :: 3', | ||||
|         'Programming Language :: Python :: 3.2', | ||||
|         'Programming Language :: Python :: 3.3', | ||||
|         'Programming Language :: Python :: 3.4', | ||||
|         'Programming Language :: Python :: 3.5', | ||||
|     ], | ||||
|  | ||||
|     cmdclass={'build_lazy_extractors': build_lazy_extractors}, | ||||
|   | ||||
| @@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
| from test.helper import FakeYDL | ||||
| from youtube_dl.extractor.common import InfoExtractor | ||||
| from youtube_dl.extractor import YoutubeIE, get_info_extractor | ||||
| from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError | ||||
| from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError | ||||
|  | ||||
|  | ||||
| class TestIE(InfoExtractor): | ||||
| @@ -66,6 +66,11 @@ class TestInfoExtractor(unittest.TestCase): | ||||
|         self.assertEqual(ie._html_search_meta('d', html), '4') | ||||
|         self.assertEqual(ie._html_search_meta('e', html), '5') | ||||
|         self.assertEqual(ie._html_search_meta('f', html), '6') | ||||
|         self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1') | ||||
|         self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3') | ||||
|         self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3') | ||||
|         self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) | ||||
|         self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) | ||||
|  | ||||
|     def test_download_json(self): | ||||
|         uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') | ||||
|   | ||||
| @@ -6,6 +6,7 @@ from __future__ import unicode_literals | ||||
| import os | ||||
| import sys | ||||
| import unittest | ||||
| import collections | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| @@ -130,6 +131,15 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|             'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', | ||||
|             ['Yahoo']) | ||||
|  | ||||
|     def test_no_duplicated_ie_names(self): | ||||
|         name_accu = collections.defaultdict(list) | ||||
|         for ie in self.ies: | ||||
|             name_accu[ie.IE_NAME.lower()].append(type(ie).__name__) | ||||
|         for (ie_name, ie_list) in name_accu.items(): | ||||
|             self.assertEqual( | ||||
|                 len(ie_list), 1, | ||||
|                 'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list))) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -87,6 +87,7 @@ class TestCompat(unittest.TestCase): | ||||
|  | ||||
|     def test_compat_shlex_split(self): | ||||
|         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) | ||||
|         self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) | ||||
|  | ||||
|     def test_compat_etree_fromstring(self): | ||||
|         xml = ''' | ||||
|   | ||||
| @@ -60,11 +60,13 @@ from youtube_dl.utils import ( | ||||
|     timeconvert, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     unified_timestamp, | ||||
|     unsmuggle_url, | ||||
|     uppercase_escape, | ||||
|     lowercase_escape, | ||||
|     url_basename, | ||||
|     urlencode_postdata, | ||||
|     urshift, | ||||
|     update_url_query, | ||||
|     version_tuple, | ||||
|     xpath_with_ns, | ||||
| @@ -283,8 +285,28 @@ class TestUtil(unittest.TestCase): | ||||
|             '20150202') | ||||
|         self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214') | ||||
|         self.assertEqual(unified_strdate('25-09-2014'), '20140925') | ||||
|         self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') | ||||
|         self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) | ||||
|  | ||||
|     def test_unified_timestamps(self): | ||||
|         self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) | ||||
|         self.assertEqual(unified_timestamp('8/7/2009'), 1247011200) | ||||
|         self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200) | ||||
|         self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598) | ||||
|         self.assertEqual(unified_timestamp('1968 12 10'), -33436800) | ||||
|         self.assertEqual(unified_timestamp('1968-12-10'), -33436800) | ||||
|         self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200) | ||||
|         self.assertEqual( | ||||
|             unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False), | ||||
|             1417001400) | ||||
|         self.assertEqual( | ||||
|             unified_timestamp('2/2/2015 6:47:40 PM', day_first=False), | ||||
|             1422902860) | ||||
|         self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900) | ||||
|         self.assertEqual(unified_timestamp('25-09-2014'), 1411603200) | ||||
|         self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) | ||||
|         self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) | ||||
|  | ||||
|     def test_determine_ext(self): | ||||
|         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') | ||||
|         self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) | ||||
| @@ -959,5 +981,9 @@ The first line | ||||
|         self.assertRaises(ValueError, encode_base_n, 0, 70) | ||||
|         self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table) | ||||
|  | ||||
|     def test_urshift(self): | ||||
|         self.assertEqual(urshift(3, 1), 1) | ||||
|         self.assertEqual(urshift(-3, 1), 2147483646) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -2,14 +2,24 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import os.path | ||||
| import re | ||||
| import binascii | ||||
| try: | ||||
|     from Crypto.Cipher import AES | ||||
|     can_decrypt_frag = True | ||||
| except ImportError: | ||||
|     can_decrypt_frag = False | ||||
|  | ||||
| from .fragment import FragmentFD | ||||
| from .external import FFmpegFD | ||||
|  | ||||
| from ..compat import compat_urlparse | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
|     compat_struct_pack, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     parse_m3u8_attributes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -21,7 +31,7 @@ class HlsFD(FragmentFD): | ||||
|     @staticmethod | ||||
|     def can_download(manifest): | ||||
|         UNSUPPORTED_FEATURES = ( | ||||
|             r'#EXT-X-KEY:METHOD=(?!NONE)',  # encrypted streams [1] | ||||
|             r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1] | ||||
|             r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2] | ||||
|  | ||||
|             # Live streams heuristic does not always work (e.g. geo restricted to Germany | ||||
| @@ -39,7 +49,9 @@ class HlsFD(FragmentFD): | ||||
|             # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 | ||||
|             # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 | ||||
|         ) | ||||
|         return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) | ||||
|         check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] | ||||
|         check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest) | ||||
|         return all(check_results) | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         man_url = info_dict['url'] | ||||
| @@ -57,36 +69,60 @@ class HlsFD(FragmentFD): | ||||
|                 fd.add_progress_hook(ph) | ||||
|             return fd.real_download(filename, info_dict) | ||||
|  | ||||
|         fragment_urls = [] | ||||
|         total_frags = 0 | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if line and not line.startswith('#'): | ||||
|                 segment_url = ( | ||||
|                     line | ||||
|                     if re.match(r'^https?://', line) | ||||
|                     else compat_urlparse.urljoin(man_url, line)) | ||||
|                 fragment_urls.append(segment_url) | ||||
|                 # We only download the first fragment during the test | ||||
|                 if self.params.get('test', False): | ||||
|                     break | ||||
|                 total_frags += 1 | ||||
|  | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'total_frags': len(fragment_urls), | ||||
|             'total_frags': total_frags, | ||||
|         } | ||||
|  | ||||
|         self._prepare_and_start_frag_download(ctx) | ||||
|  | ||||
|         i = 0 | ||||
|         media_sequence = 0 | ||||
|         decrypt_info = {'METHOD': 'NONE'} | ||||
|         frags_filenames = [] | ||||
|         for i, frag_url in enumerate(fragment_urls): | ||||
|             frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) | ||||
|             success = ctx['dl'].download(frag_filename, {'url': frag_url}) | ||||
|             if not success: | ||||
|                 return False | ||||
|             down, frag_sanitized = sanitize_open(frag_filename, 'rb') | ||||
|             ctx['dest_stream'].write(down.read()) | ||||
|             down.close() | ||||
|             frags_filenames.append(frag_sanitized) | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if line: | ||||
|                 if not line.startswith('#'): | ||||
|                     frag_url = ( | ||||
|                         line | ||||
|                         if re.match(r'^https?://', line) | ||||
|                         else compat_urlparse.urljoin(man_url, line)) | ||||
|                     frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) | ||||
|                     success = ctx['dl'].download(frag_filename, {'url': frag_url}) | ||||
|                     if not success: | ||||
|                         return False | ||||
|                     down, frag_sanitized = sanitize_open(frag_filename, 'rb') | ||||
|                     frag_content = down.read() | ||||
|                     down.close() | ||||
|                     if decrypt_info['METHOD'] == 'AES-128': | ||||
|                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) | ||||
|                         frag_content = AES.new( | ||||
|                             decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) | ||||
|                     ctx['dest_stream'].write(frag_content) | ||||
|                     frags_filenames.append(frag_sanitized) | ||||
|                     # We only download the first fragment during the test | ||||
|                     if self.params.get('test', False): | ||||
|                         break | ||||
|                     i += 1 | ||||
|                     media_sequence += 1 | ||||
|                 elif line.startswith('#EXT-X-KEY'): | ||||
|                     decrypt_info = parse_m3u8_attributes(line[11:]) | ||||
|                     if decrypt_info['METHOD'] == 'AES-128': | ||||
|                         if 'IV' in decrypt_info: | ||||
|                             decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:]) | ||||
|                         if not re.match(r'^https?://', decrypt_info['URI']): | ||||
|                             decrypt_info['URI'] = compat_urlparse.urljoin( | ||||
|                                 man_url, decrypt_info['URI']) | ||||
|                         decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() | ||||
|                 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): | ||||
|                     media_sequence = int(line[22:]) | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|   | ||||
| @@ -7,18 +7,123 @@ from ..utils import ( | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
|     unescapeHTML, | ||||
|     extract_attributes, | ||||
|     get_element_by_attribute, | ||||
| ) | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AENetworksIE(InfoExtractor): | ||||
| class AENetworksBaseIE(InfoExtractor): | ||||
|     def theplatform_url_result(self, theplatform_url, video_id, query): | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': smuggle_url( | ||||
|                 update_url_query(theplatform_url, query), | ||||
|                 { | ||||
|                     'sig': { | ||||
|                         'key': 'crazyjava', | ||||
|                         'secret': 's3cr3t' | ||||
|                     }, | ||||
|                     'force_smil_url': True | ||||
|                 }), | ||||
|             'ie_key': 'ThePlatform', | ||||
|         } | ||||
|  | ||||
|  | ||||
| class AENetworksIE(AENetworksBaseIE): | ||||
|     IE_NAME = 'aenetworks' | ||||
|     IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', | ||||
|         'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', | ||||
|         'info_dict': { | ||||
|             'id': '22253814', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Winter Is Coming', | ||||
|             'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', | ||||
|             'timestamp': 1338306241, | ||||
|             'upload_date': '20120529', | ||||
|             'uploader': 'AENE-NEW', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }, { | ||||
|         'url': 'http://www.history.com/shows/ancient-aliens/season-1', | ||||
|         'info_dict': { | ||||
|             'id': '71889446852', | ||||
|         }, | ||||
|         'playlist_mincount': 5, | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/shows/atlanta-plastic', | ||||
|         'info_dict': { | ||||
|             'id': 'SERIES4317', | ||||
|             'title': 'Atlanta Plastic', | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|     }, { | ||||
|         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         show_path, movie_display_id = re.match(self._VALID_URL, url).groups() | ||||
|         display_id = show_path or movie_display_id | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         if show_path: | ||||
|             url_parts = show_path.split('/') | ||||
|             url_parts_len = len(url_parts) | ||||
|             if url_parts_len == 1: | ||||
|                 entries = [] | ||||
|                 for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): | ||||
|                     entries.append(self.url_result( | ||||
|                         compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) | ||||
|                 return self.playlist_result( | ||||
|                     entries, self._html_search_meta('aetn:SeriesId', webpage), | ||||
|                     self._html_search_meta('aetn:SeriesTitle', webpage)) | ||||
|             elif url_parts_len == 2: | ||||
|                 entries = [] | ||||
|                 for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage): | ||||
|                     episode_attributes = extract_attributes(episode_item) | ||||
|                     episode_url = compat_urlparse.urljoin( | ||||
|                         url, episode_attributes['data-canonical']) | ||||
|                     entries.append(self.url_result( | ||||
|                         episode_url, 'AENetworks', | ||||
|                         episode_attributes['data-videoid'])) | ||||
|                 return self.playlist_result( | ||||
|                     entries, self._html_search_meta('aetn:SeasonId', webpage)) | ||||
|         video_id = self._html_search_meta('aetn:VideoID', webpage) | ||||
|         media_url = self._search_regex( | ||||
|             r"media_url\s*=\s*'([^']+)'", webpage, 'video url') | ||||
|  | ||||
|         info = self._search_json_ld(webpage, video_id, fatal=False) | ||||
|         info.update(self.theplatform_url_result( | ||||
|             media_url, video_id, { | ||||
|                 'mbr': 'true', | ||||
|                 'assetTypes': 'medium_video_s3' | ||||
|             })) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class HistoryTopicIE(AENetworksBaseIE): | ||||
|     IE_NAME = 'history:topic' | ||||
|     IE_DESC = 'History.com Topic' | ||||
|     _VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)/videos(?:/(?P<video_display_id>[^/?#]+))?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', | ||||
|         'info_dict': { | ||||
|             'id': 'g12m5Gyt3fdR', | ||||
|             'id': '40700995724', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Bet You Didn't Know: Valentine's Day", | ||||
|             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', | ||||
| @@ -31,57 +136,39 @@ class AENetworksIE(InfoExtractor): | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|         'expected_warnings': ['JSON-LD'], | ||||
|     }, { | ||||
|         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', | ||||
|         'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', | ||||
|         'info_dict': { | ||||
|             'id': 'eg47EERs_JsZ', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Winter Is Coming', | ||||
|             'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', | ||||
|             'timestamp': 1338306241, | ||||
|             'upload_date': '20120529', | ||||
|             'uploader': 'AENE-NEW', | ||||
|         'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos', | ||||
|         'info_dict': | ||||
|         { | ||||
|             'id': 'world-war-i-history', | ||||
|             'title': 'World War I History', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|         'playlist_mincount': 24, | ||||
|     }, { | ||||
|         'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients', | ||||
|         'only_matching': True | ||||
|         'url': 'http://www.history.com/topics/world-war-i-history/videos', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_type, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         topic_id, video_display_id = re.match(self._VALID_URL, url).groups() | ||||
|         if video_display_id: | ||||
|             webpage = self._download_webpage(url, video_display_id) | ||||
|             release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups() | ||||
|             release_url = unescapeHTML(release_url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url_re = [ | ||||
|             r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, | ||||
|             r"media_url\s*=\s*'([^']+)'" | ||||
|         ] | ||||
|         video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url')) | ||||
|         query = {'mbr': 'true'} | ||||
|         if page_type == 'shows': | ||||
|             query['assetTypes'] = 'medium_video_s3' | ||||
|         if 'switch=hds' in video_url: | ||||
|             query['switch'] = 'hls' | ||||
|  | ||||
|         info = self._search_json_ld(webpage, video_id, fatal=False) | ||||
|         info.update({ | ||||
|             '_type': 'url_transparent', | ||||
|             'url': smuggle_url( | ||||
|                 update_url_query(video_url, query), | ||||
|                 { | ||||
|                     'sig': { | ||||
|                         'key': 'crazyjava', | ||||
|                         'secret': 's3cr3t'}, | ||||
|                     'force_smil_url': True | ||||
|                 }), | ||||
|         }) | ||||
|         return info | ||||
|             return self.theplatform_url_result( | ||||
|                 release_url, video_id, { | ||||
|                     'mbr': 'true', | ||||
|                     'switch': 'hls' | ||||
|                 }) | ||||
|         else: | ||||
|             webpage = self._download_webpage(url, topic_id) | ||||
|             entries = [] | ||||
|             for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage): | ||||
|                 video_attributes = extract_attributes(episode_item) | ||||
|                 entries.append(self.theplatform_url_result( | ||||
|                     video_attributes['data-release-url'], video_attributes['data-id'], { | ||||
|                         'mbr': 'true', | ||||
|                         'switch': 'hls' | ||||
|                     })) | ||||
|             return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage)) | ||||
|   | ||||
| @@ -7,6 +7,8 @@ from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor): | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', | ||||
|         'info_dict': { | ||||
|             'id': 'manofsteel', | ||||
|             'id': '5111', | ||||
|             'title': 'Man of Steel', | ||||
|         }, | ||||
|         'playlist': [ | ||||
|             { | ||||
| @@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor): | ||||
|             'id': 'blackthorn', | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|         'expected_warnings': ['Unable to download JSON metadata'], | ||||
|     }, { | ||||
|         # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json | ||||
|         'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/', | ||||
|         'info_dict': { | ||||
|             'id': '15881', | ||||
|             'title': 'Kung Fu Panda 3', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/ca/metropole/autrui/', | ||||
|         'only_matching': True, | ||||
| @@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor): | ||||
|         movie = mobj.group('movie') | ||||
|         uploader_id = mobj.group('company') | ||||
|  | ||||
|         webpage = self._download_webpage(url, movie) | ||||
|         film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') | ||||
|         film_data = self._download_json( | ||||
|             'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, | ||||
|             film_id, fatal=False) | ||||
|  | ||||
|         if film_data: | ||||
|             entries = [] | ||||
|             for clip in film_data.get('clips', []): | ||||
|                 clip_title = clip['title'] | ||||
|  | ||||
|                 formats = [] | ||||
|                 for version, version_data in clip.get('versions', {}).items(): | ||||
|                     for size, size_data in version_data.get('sizes', {}).items(): | ||||
|                         src = size_data.get('src') | ||||
|                         if not src: | ||||
|                             continue | ||||
|                         formats.append({ | ||||
|                             'format_id': '%s-%s' % (version, size), | ||||
|                             'url': re.sub(r'_(\d+p.mov)', r'_h\1', src), | ||||
|                             'width': int_or_none(size_data.get('width')), | ||||
|                             'height': int_or_none(size_data.get('height')), | ||||
|                             'language': version[:2], | ||||
|                         }) | ||||
|                 self._sort_formats(formats) | ||||
|  | ||||
|                 entries.append({ | ||||
|                     'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), | ||||
|                     'formats': formats, | ||||
|                     'title': clip_title, | ||||
|                     'thumbnail': clip.get('screen') or clip.get('thumb'), | ||||
|                     'duration': parse_duration(clip.get('runtime') or clip.get('faded')), | ||||
|                     'upload_date': unified_strdate(clip.get('posted')), | ||||
|                     'uploader_id': uploader_id, | ||||
|                 }) | ||||
|  | ||||
|             page_data = film_data.get('page', {}) | ||||
|             return self.playlist_result(entries, film_id, page_data.get('movie_title')) | ||||
|  | ||||
|         playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') | ||||
|  | ||||
|         def fix_html(s): | ||||
|   | ||||
| @@ -419,6 +419,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): | ||||
|         'info_dict': { | ||||
|             'id': 'PL-013263', | ||||
|             'title': 'Areva & Uramin', | ||||
|             'description': 'md5:a1dc0312ce357c262259139cfd48c9bf', | ||||
|         }, | ||||
|         'playlist_mincount': 6, | ||||
|     }, { | ||||
|   | ||||
| @@ -1,17 +1,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .theplatform import ThePlatformIE | ||||
| from .theplatform import ThePlatformFeedIE | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_element, | ||||
|     int_or_none, | ||||
|     find_xpath_attr, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CBSBaseIE(ThePlatformIE): | ||||
| class CBSBaseIE(ThePlatformFeedIE): | ||||
|     def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): | ||||
|         closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') | ||||
|         return { | ||||
| @@ -21,9 +17,22 @@ class CBSBaseIE(ThePlatformIE): | ||||
|             }] | ||||
|         } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] | ||||
|  | ||||
|     def _extract_video_info(self, filter_query, video_id): | ||||
|         return self._extract_feed_info( | ||||
|             'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: { | ||||
|                 'series': entry.get('cbs$SeriesTitle'), | ||||
|                 'season_number': int_or_none(entry.get('cbs$SeasonNumber')), | ||||
|                 'episode': entry.get('cbs$EpisodeTitle'), | ||||
|                 'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')), | ||||
|             }, { | ||||
|                 'StreamPack': { | ||||
|                     'manifest': 'm3u', | ||||
|                 } | ||||
|             }) | ||||
|  | ||||
|  | ||||
| class CBSIE(CBSBaseIE): | ||||
|     _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))' | ||||
|     _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', | ||||
| @@ -38,25 +47,7 @@ class CBSIE(CBSBaseIE): | ||||
|             'upload_date': '20131127', | ||||
|             'uploader': 'CBSI-NEW', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         '_skip': 'Blocked outside the US', | ||||
|     }, { | ||||
|         'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', | ||||
|         'info_dict': { | ||||
|             'id': 'WWF_5KqY3PK1', | ||||
|             'display_id': 'st-vincent', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Live on Letterman - St. Vincent', | ||||
|             'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', | ||||
|             'duration': 3221, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|         '_skip': 'Blocked outside the US', | ||||
|     }, { | ||||
|         'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', | ||||
| @@ -68,44 +59,5 @@ class CBSIE(CBSBaseIE): | ||||
|     TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         content_id, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         if not content_id: | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|             content_id = self._search_regex( | ||||
|                 [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], | ||||
|                 webpage, 'content id') | ||||
|         items_data = self._download_xml( | ||||
|             'http://can.cbs.com/thunder/player/videoPlayerService.php', | ||||
|             content_id, query={'partner': 'cbs', 'contentId': content_id}) | ||||
|         video_data = xpath_element(items_data, './/item') | ||||
|         title = xpath_text(video_data, 'videoTitle', 'title', True) | ||||
|  | ||||
|         subtitles = {} | ||||
|         formats = [] | ||||
|         for item in items_data.findall('.//item'): | ||||
|             pid = xpath_text(item, 'pid') | ||||
|             if not pid: | ||||
|                 continue | ||||
|             tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid | ||||
|             if '.m3u8' in xpath_text(item, 'contentUrl', default=''): | ||||
|                 tp_release_url += '&manifest=m3u' | ||||
|             tp_formats, tp_subtitles = self._extract_theplatform_smil( | ||||
|                 tp_release_url, content_id, 'Downloading %s SMIL data' % pid) | ||||
|             formats.extend(tp_formats) | ||||
|             subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id) | ||||
|         info.update({ | ||||
|             'id': content_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'series': xpath_text(video_data, 'seriesTitle'), | ||||
|             'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), | ||||
|             'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), | ||||
|             'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), | ||||
|             'thumbnail': xpath_text(video_data, 'previewImageURL'), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         }) | ||||
|         return info | ||||
|         content_id = self._match_id(url) | ||||
|         return self._extract_video_info('byGuid=%s' % content_id, content_id) | ||||
|   | ||||
| @@ -30,9 +30,12 @@ class CBSNewsIE(CBSBaseIE): | ||||
|         { | ||||
|             'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', | ||||
|             'info_dict': { | ||||
|                 'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack', | ||||
|                 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', | ||||
|                 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', | ||||
|                 'upload_date': '19700101', | ||||
|                 'uploader': 'CBSI-NEW', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'duration': 205, | ||||
|                 'subtitles': { | ||||
| @@ -58,30 +61,8 @@ class CBSNewsIE(CBSBaseIE): | ||||
|             webpage, 'video JSON info'), video_id) | ||||
|  | ||||
|         item = video_info['item'] if 'item' in video_info else video_info | ||||
|         title = item.get('articleTitle') or item.get('hed') | ||||
|         duration = item.get('duration') | ||||
|         thumbnail = item.get('mediaImage') or item.get('thumbnail') | ||||
|  | ||||
|         subtitles = {} | ||||
|         formats = [] | ||||
|         for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']: | ||||
|             pid = item.get('media' + format_id) | ||||
|             if not pid: | ||||
|                 continue | ||||
|             release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid | ||||
|             tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid) | ||||
|             formats.extend(tp_formats) | ||||
|             subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|         guid = item['mpxRefId'] | ||||
|         return self._extract_video_info('byGuid=%s' % guid, guid) | ||||
|  | ||||
|  | ||||
| class CBSNewsLiveVideoIE(InfoExtractor): | ||||
|   | ||||
| @@ -1,30 +1,28 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .cbs import CBSBaseIE | ||||
|  | ||||
|  | ||||
| class CBSSportsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' | ||||
| class CBSSportsIE(CBSBaseIE): | ||||
|     _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', | ||||
|         'info_dict': { | ||||
|             'id': '_d5_GbO8p1sT', | ||||
|             'ext': 'flv', | ||||
|             'title': 'US Open flashbacks: 1990s', | ||||
|             'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', | ||||
|             'id': '708337219968', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ben Simmons the next LeBron? Not so fast', | ||||
|             'description': 'md5:854294f627921baba1f4b9a990d87197', | ||||
|             'timestamp': 1466293740, | ||||
|             'upload_date': '20160618', | ||||
|             'uploader': 'CBSI-NEW', | ||||
|         }, | ||||
|     } | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         section = mobj.group('section') | ||||
|         video_id = mobj.group('id') | ||||
|         all_videos = self._download_json( | ||||
|             'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section, | ||||
|             video_id) | ||||
|         # The json file contains the info of all the videos in the section | ||||
|         video_info = next(v for v in all_videos if v['pcid'] == video_id) | ||||
|         return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform') | ||||
|         video_id = self._match_id(url) | ||||
|         return self._extract_video_info('byId=%s' % video_id, video_id) | ||||
|   | ||||
| @@ -53,6 +53,7 @@ from ..utils import ( | ||||
|     mimetype2ext, | ||||
|     update_Request, | ||||
|     update_url_query, | ||||
|     parse_m3u8_attributes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -748,10 +749,12 @@ class InfoExtractor(object): | ||||
|         return self._og_search_property('url', html, **kargs) | ||||
|  | ||||
|     def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): | ||||
|         if not isinstance(name, (list, tuple)): | ||||
|             name = [name] | ||||
|         if display_name is None: | ||||
|             display_name = name | ||||
|             display_name = name[0] | ||||
|         return self._html_search_regex( | ||||
|             self._meta_regex(name), | ||||
|             [self._meta_regex(n) for n in name], | ||||
|             html, display_name, fatal=fatal, group='content', **kwargs) | ||||
|  | ||||
|     def _dc_search_uploader(self, html): | ||||
| @@ -875,7 +878,11 @@ class InfoExtractor(object): | ||||
|                 f['ext'] = determine_ext(f['url']) | ||||
|  | ||||
|             if isinstance(field_preference, (list, tuple)): | ||||
|                 return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference) | ||||
|                 return tuple( | ||||
|                     f.get(field) | ||||
|                     if f.get(field) is not None | ||||
|                     else ('' if field == 'format_id' else -1) | ||||
|                     for field in field_preference) | ||||
|  | ||||
|             preference = f.get('preference') | ||||
|             if preference is None: | ||||
| @@ -1150,23 +1157,11 @@ class InfoExtractor(object): | ||||
|             }] | ||||
|         last_info = None | ||||
|         last_media = None | ||||
|         kv_rex = re.compile( | ||||
|             r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)') | ||||
|         for line in m3u8_doc.splitlines(): | ||||
|             if line.startswith('#EXT-X-STREAM-INF:'): | ||||
|                 last_info = {} | ||||
|                 for m in kv_rex.finditer(line): | ||||
|                     v = m.group('val') | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_info[m.group('key')] = v | ||||
|                 last_info = parse_m3u8_attributes(line) | ||||
|             elif line.startswith('#EXT-X-MEDIA:'): | ||||
|                 last_media = {} | ||||
|                 for m in kv_rex.finditer(line): | ||||
|                     v = m.group('val') | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_media[m.group('key')] = v | ||||
|                 last_media = parse_m3u8_attributes(line) | ||||
|             elif line.startswith('#') or not line.strip(): | ||||
|                 continue | ||||
|             else: | ||||
|   | ||||
							
								
								
									
										30
									
								
								youtube_dl/extractor/ctv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								youtube_dl/extractor/ctv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ctv.ca/video/player?vid=706966', | ||||
|         'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', | ||||
|         'info_dict': { | ||||
|             'id': '706966', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'', | ||||
|             'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.', | ||||
|             'upload_date': '20150919', | ||||
|             'timestamp': 1442624700, | ||||
|         }, | ||||
|         'expected_warnings': ['HTTP Error 404'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': '9c9media:ctv_web:%s' % video_id, | ||||
|             'ie_key': 'NineCNineMedia', | ||||
|         } | ||||
							
								
								
									
										65
									
								
								youtube_dl/extractor/ctvnews.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								youtube_dl/extractor/ctvnews.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import orderedSet | ||||
|  | ||||
|  | ||||
| class CTVNewsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ctvnews.ca/video?clipId=901995', | ||||
|         'md5': '10deb320dc0ccb8d01d34d12fc2ea672', | ||||
|         'info_dict': { | ||||
|             'id': '901995', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Extended: \'That person cannot be me\' Johnson says', | ||||
|             'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', | ||||
|             'timestamp': 1467286284, | ||||
|             'upload_date': '20160630', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224', | ||||
|         'info_dict': | ||||
|         { | ||||
|             'id': '1.2966224', | ||||
|         }, | ||||
|         'playlist_mincount': 19, | ||||
|     }, { | ||||
|         'url': 'http://www.ctvnews.ca/video?binId=1.2876780', | ||||
|         'info_dict': | ||||
|         { | ||||
|             'id': '1.2876780', | ||||
|         }, | ||||
|         'playlist_mincount': 100, | ||||
|     }, { | ||||
|         'url': 'http://www.ctvnews.ca/1.810401', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_id = self._match_id(url) | ||||
|  | ||||
|         def ninecninemedia_url_result(clip_id): | ||||
|             return { | ||||
|                 '_type': 'url_transparent', | ||||
|                 'id': clip_id, | ||||
|                 'url': '9c9media:ctvnews_web:%s' % clip_id, | ||||
|                 'ie_key': 'NineCNineMedia', | ||||
|             } | ||||
|  | ||||
|         if page_id.isdigit(): | ||||
|             return ninecninemedia_url_result(page_id) | ||||
|         else: | ||||
|             webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={ | ||||
|                 'ot': 'example.AjaxPageLayout.ot', | ||||
|                 'maxItemsPerPage': 1000000, | ||||
|             }) | ||||
|             entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet( | ||||
|                 re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] | ||||
|             return self.playlist_result(entries, page_id) | ||||
| @@ -20,7 +20,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class DCNIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() | ||||
| @@ -55,30 +55,32 @@ class DCNBaseIE(InfoExtractor): | ||||
|             'is_live': is_live, | ||||
|         } | ||||
|  | ||||
|     def _extract_video_formats(self, webpage, video_id, entry_protocol): | ||||
|     def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol): | ||||
|         formats = [] | ||||
|         m3u8_url = self._html_search_regex( | ||||
|             r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False) | ||||
|         if m3u8_url: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None)) | ||||
|  | ||||
|         rtsp_url = self._search_regex( | ||||
|             r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) | ||||
|         if rtsp_url: | ||||
|             formats.append({ | ||||
|                 'url': rtsp_url, | ||||
|                 'format_id': 'rtsp', | ||||
|             }) | ||||
|  | ||||
|         format_url_base = 'http' + self._html_search_regex( | ||||
|             [ | ||||
|                 r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', | ||||
|                 r'<a[^>]+href="rtsp(://[^"]+)"' | ||||
|             ], webpage, 'format url') | ||||
|         # TODO: Current DASH formats are broken - $Time$ pattern in | ||||
|         # <SegmentTemplate> not implemented yet | ||||
|         # formats.extend(self._extract_mpd_formats( | ||||
|         #     format_url_base + '/manifest.mpd', | ||||
|         #     video_id, mpd_id='dash', fatal=False)) | ||||
|         formats.extend(self._extract_m3u8_formats( | ||||
|             format_url_base + '/playlist.m3u8', video_id, 'mp4', | ||||
|             m3u8_entry_protocol, m3u8_id='hls', fatal=False)) | ||||
|         formats.extend(self._extract_f4m_formats( | ||||
|             format_url_base + '/manifest.f4m', | ||||
|             video_id, f4m_id='hds', fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
|  | ||||
|  | ||||
| class DCNVideoIE(DCNBaseIE): | ||||
|     IE_NAME = 'dcn:video' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', | ||||
|         'info_dict': | ||||
|         { | ||||
| @@ -94,7 +96,10 @@ class DCNVideoIE(DCNBaseIE): | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -120,7 +125,7 @@ class DCNVideoIE(DCNBaseIE): | ||||
|  | ||||
| class DCNLiveIE(DCNBaseIE): | ||||
|     IE_NAME = 'dcn:live' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_id = self._match_id(url) | ||||
| @@ -147,7 +152,7 @@ class DCNLiveIE(DCNBaseIE): | ||||
|  | ||||
| class DCNSeasonIE(InfoExtractor): | ||||
|     IE_NAME = 'dcn:season' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' | ||||
|     _TEST = { | ||||
|         'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', | ||||
|         'info_dict': | ||||
|   | ||||
| @@ -50,6 +50,14 @@ class EaglePlatformIE(InfoExtractor): | ||||
|         'skip': 'Georestricted', | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     @staticmethod | ||||
|     def _handle_error(response): | ||||
|         status = int_or_none(response.get('status', 200)) | ||||
|   | ||||
| @@ -20,7 +20,10 @@ from .adobetv import ( | ||||
|     AdobeTVVideoIE, | ||||
| ) | ||||
| from .adultswim import AdultSwimIE | ||||
| from .aenetworks import AENetworksIE | ||||
| from .aenetworks import ( | ||||
|     AENetworksIE, | ||||
|     HistoryTopicIE, | ||||
| ) | ||||
| from .afreecatv import AfreecaTVIE | ||||
| from .aftonbladet import AftonbladetIE | ||||
| from .airmozilla import AirMozillaIE | ||||
| @@ -168,6 +171,8 @@ from .crunchyroll import ( | ||||
| ) | ||||
| from .cspan import CSpanIE | ||||
| from .ctsnews import CtsNewsIE | ||||
| from .ctv import CTVIE | ||||
| from .ctvnews import CTVNewsIE | ||||
| from .cultureunplugged import CultureUnpluggedIE | ||||
| from .cwtv import CWTVIE | ||||
| from .dailymail import DailyMailIE | ||||
| @@ -276,6 +281,7 @@ from .freespeech import FreespeechIE | ||||
| from .freevideo import FreeVideoIE | ||||
| from .funimation import FunimationIE | ||||
| from .funnyordie import FunnyOrDieIE | ||||
| from .fusion import FusionIE | ||||
| from .gameinformer import GameInformerIE | ||||
| from .gamekings import GamekingsIE | ||||
| from .gameone import ( | ||||
| @@ -285,7 +291,6 @@ from .gameone import ( | ||||
| from .gamersyde import GamersydeIE | ||||
| from .gamespot import GameSpotIE | ||||
| from .gamestar import GameStarIE | ||||
| from .gametrailers import GametrailersIE | ||||
| from .gazeta import GazetaIE | ||||
| from .gdcvault import GDCVaultIE | ||||
| from .generic import GenericIE | ||||
| @@ -321,6 +326,10 @@ from .hotnewhiphop import HotNewHipHopIE | ||||
| from .hotstar import HotStarIE | ||||
| from .howcast import HowcastIE | ||||
| from .howstuffworks import HowStuffWorksIE | ||||
| from .hrti import ( | ||||
|     HRTiIE, | ||||
|     HRTiPlaylistIE, | ||||
| ) | ||||
| from .huffpost import HuffPostIE | ||||
| from .hypem import HypemIE | ||||
| from .iconosquare import IconosquareIE | ||||
| @@ -423,6 +432,7 @@ from .makerschannel import MakersChannelIE | ||||
| from .makertv import MakerTVIE | ||||
| from .matchtv import MatchTVIE | ||||
| from .mdr import MDRIE | ||||
| from .meta import METAIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .metacritic import MetacriticIE | ||||
| from .mgoon import MgoonIE | ||||
| @@ -455,6 +465,7 @@ from .motherless import MotherlessIE | ||||
| from .motorsport import MotorsportIE | ||||
| from .movieclips import MovieClipsIE | ||||
| from .moviezine import MoviezineIE | ||||
| from .msn import MSNIE | ||||
| from .mtv import ( | ||||
|     MTVIE, | ||||
|     MTVServicesEmbeddedIE, | ||||
| @@ -481,7 +492,6 @@ from .nbc import ( | ||||
|     NBCNewsIE, | ||||
|     NBCSportsIE, | ||||
|     NBCSportsVPlayerIE, | ||||
|     MSNBCIE, | ||||
| ) | ||||
| from .ndr import ( | ||||
|     NDRIE, | ||||
| @@ -523,6 +533,7 @@ from .nick import ( | ||||
|     NickDeIE, | ||||
| ) | ||||
| from .niconico import NiconicoIE, NiconicoPlaylistIE | ||||
| from .ninecninemedia import NineCNineMediaIE | ||||
| from .ninegag import NineGagIE | ||||
| from .noco import NocoIE | ||||
| from .normalboots import NormalbootsIE | ||||
| @@ -608,6 +619,7 @@ from .pluralsight import ( | ||||
|     PluralsightCourseIE, | ||||
| ) | ||||
| from .podomatic import PodomaticIE | ||||
| from .polskieradio import PolskieRadioIE | ||||
| from .porn91 import Porn91IE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import ( | ||||
| @@ -706,10 +718,12 @@ from .shahid import ShahidIE | ||||
| from .shared import SharedIE | ||||
| from .sharesix import ShareSixIE | ||||
| from .sina import SinaIE | ||||
| from .sixplay import SixPlayIE | ||||
| from .skynewsarabia import ( | ||||
|     SkyNewsArabiaIE, | ||||
|     SkyNewsArabiaArticleIE, | ||||
| ) | ||||
| from .skysports import SkySportsIE | ||||
| from .slideshare import SlideshareIE | ||||
| from .slutload import SlutloadIE | ||||
| from .smotri import ( | ||||
| @@ -891,6 +905,7 @@ from .udn import UDNEmbedIE | ||||
| from .digiteka import DigitekaIE | ||||
| from .unistra import UnistraIE | ||||
| from .urort import UrortIE | ||||
| from .urplay import URPlayIE | ||||
| from .usatoday import USATodayIE | ||||
| from .ustream import UstreamIE, UstreamChannelIE | ||||
| from .ustudio import ( | ||||
| @@ -917,6 +932,7 @@ from .vice import ( | ||||
|     ViceIE, | ||||
|     ViceShowIE, | ||||
| ) | ||||
| from .vidbit import VidbitIE | ||||
| from .viddler import ViddlerIE | ||||
| from .videodetective import VideoDetectiveIE | ||||
| from .videofyme import VideofyMeIE | ||||
|   | ||||
| @@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|         # Facebook API embed | ||||
|         # see https://developers.facebook.com/docs/plugins/embedded-video-player | ||||
|         mobj = re.search(r'''(?x)<div[^>]+ | ||||
|                 class=(?P<q1>[\'"])[^\'"]*\bfb-video\b[^\'"]*(?P=q1)[^>]+ | ||||
|                 data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage) | ||||
|         if mobj is not None: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _login(self): | ||||
|         (useremail, password) = self._get_login_info() | ||||
|         if useremail is None: | ||||
| @@ -239,6 +254,8 @@ class FacebookIE(InfoExtractor): | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, f in video_data.items(): | ||||
|             if f and isinstance(f, dict): | ||||
|                 f = [f] | ||||
|             if not f or not isinstance(f, list): | ||||
|                 continue | ||||
|             for quality in ('sd', 'hd'): | ||||
|   | ||||
| @@ -1,7 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FoxSportsIE(InfoExtractor): | ||||
| @@ -9,11 +12,15 @@ class FoxSportsIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.foxsports.com/video?vid=432609859715', | ||||
|         'md5': 'b49050e955bebe32c301972e4012ac17', | ||||
|         'info_dict': { | ||||
|             'id': 'gA0bHB3Ladz3', | ||||
|             'ext': 'flv', | ||||
|             'id': 'i0qKWsk3qJaM', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', | ||||
|             'description': 'Courtney Lee talks about Memphis being focused.', | ||||
|             'upload_date': '20150423', | ||||
|             'timestamp': 1429761109, | ||||
|             'uploader': 'NEWA-FNG-FOXSPORTS', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     } | ||||
| @@ -28,5 +35,8 @@ class FoxSportsIE(InfoExtractor): | ||||
|                 r"data-player-config='([^']+)'", webpage, 'data player config'), | ||||
|             video_id) | ||||
|  | ||||
|         return self.url_result(smuggle_url( | ||||
|             config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True})) | ||||
|         return self.url_result(smuggle_url(update_url_query( | ||||
|             config['releaseURL'], { | ||||
|                 'mbr': 'true', | ||||
|                 'switch': 'http', | ||||
|             }), {'force_smil_url': True})) | ||||
|   | ||||
							
								
								
									
										35
									
								
								youtube_dl/extractor/fusion.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								youtube_dl/extractor/fusion.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .ooyala import OoyalaIE | ||||
|  | ||||
|  | ||||
| class FusionIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', | ||||
|         'info_dict': { | ||||
|             'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', | ||||
|             'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', | ||||
|             'duration': 140.0, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }, { | ||||
|         'url': 'http://fusion.net/video/201781', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         ooyala_code = self._search_regex( | ||||
|             r'data-video-id=(["\'])(?P<code>.+?)\1', | ||||
|             webpage, 'ooyala code', group='code') | ||||
|  | ||||
|         return OoyalaIE._build_url_result(ooyala_code) | ||||
| @@ -1,19 +1,19 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .once import OnceIE | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_unquote, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     url_basename, | ||||
|     dict_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GameSpotIE(InfoExtractor): | ||||
| class GameSpotIE(OnceIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', | ||||
| @@ -39,29 +39,73 @@ class GameSpotIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|         data_video_json = self._search_regex( | ||||
|             r'data-video=["\'](.*?)["\']', webpage, 'data video') | ||||
|         data_video = json.loads(unescapeHTML(data_video_json)) | ||||
|         data_video = self._parse_json(unescapeHTML(data_video_json), page_id) | ||||
|         streams = data_video['videoStreams'] | ||||
|  | ||||
|         manifest_url = None | ||||
|         formats = [] | ||||
|         f4m_url = streams.get('f4m_stream') | ||||
|         if f4m_url is not None: | ||||
|             # Transform the manifest url to a link to the mp4 files | ||||
|             # they are used in mobile devices. | ||||
|             f4m_path = compat_urlparse.urlparse(f4m_url).path | ||||
|             QUALITIES_RE = r'((,\d+)+,?)' | ||||
|             qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',') | ||||
|             http_path = f4m_path[1:].split('/', 1)[1] | ||||
|             http_template = re.sub(QUALITIES_RE, r'%s', http_path) | ||||
|             http_template = http_template.replace('.csmil/manifest.f4m', '') | ||||
|             http_template = compat_urlparse.urljoin( | ||||
|                 'http://video.gamespotcdn.com/', http_template) | ||||
|             for q in qualities: | ||||
|                 formats.append({ | ||||
|                     'url': http_template % q, | ||||
|                     'ext': 'mp4', | ||||
|                     'format_id': q, | ||||
|                 }) | ||||
|         else: | ||||
|         if f4m_url: | ||||
|             manifest_url = f4m_url | ||||
|             formats.extend(self._extract_f4m_formats( | ||||
|                 f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False)) | ||||
|         m3u8_url = streams.get('m3u8_stream') | ||||
|         if m3u8_url: | ||||
|             manifest_url = m3u8_url | ||||
|             m3u8_formats = self._extract_m3u8_formats( | ||||
|                 m3u8_url, page_id, 'mp4', 'm3u8_native', | ||||
|                 m3u8_id='hls', fatal=False) | ||||
|             formats.extend(m3u8_formats) | ||||
|         progressive_url = dict_get( | ||||
|             streams, ('progressive_hd', 'progressive_high', 'progressive_low')) | ||||
|         if progressive_url and manifest_url: | ||||
|             qualities_basename = self._search_regex( | ||||
|                 '/([^/]+)\.csmil/', | ||||
|                 manifest_url, 'qualities basename', default=None) | ||||
|             if qualities_basename: | ||||
|                 QUALITIES_RE = r'((,\d+)+,?)' | ||||
|                 qualities = self._search_regex( | ||||
|                     QUALITIES_RE, qualities_basename, | ||||
|                     'qualities', default=None) | ||||
|                 if qualities: | ||||
|                     qualities = list(map(lambda q: int(q), qualities.strip(',').split(','))) | ||||
|                     qualities.sort() | ||||
|                     http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename) | ||||
|                     http_url_basename = url_basename(progressive_url) | ||||
|                     if m3u8_formats: | ||||
|                         self._sort_formats(m3u8_formats) | ||||
|                         m3u8_formats = list(filter( | ||||
|                             lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', | ||||
|                             m3u8_formats)) | ||||
|                     if len(qualities) == len(m3u8_formats): | ||||
|                         for q, m3u8_format in zip(qualities, m3u8_formats): | ||||
|                             f = m3u8_format.copy() | ||||
|                             f.update({ | ||||
|                                 'url': progressive_url.replace( | ||||
|                                     http_url_basename, http_template % q), | ||||
|                                 'format_id': f['format_id'].replace('hls', 'http'), | ||||
|                                 'protocol': 'http', | ||||
|                             }) | ||||
|                             formats.append(f) | ||||
|                     else: | ||||
|                         for q in qualities: | ||||
|                             formats.append({ | ||||
|                                 'url': progressive_url.replace( | ||||
|                                     http_url_basename, http_template % q), | ||||
|                                 'ext': 'mp4', | ||||
|                                 'format_id': 'http-%d' % q, | ||||
|                                 'tbr': q, | ||||
|                             }) | ||||
|  | ||||
|         onceux_json = self._search_regex( | ||||
|             r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None) | ||||
|         if onceux_json: | ||||
|             onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') | ||||
|             if onceux_url: | ||||
|                 formats.extend(self._extract_once_formats(re.sub( | ||||
|                     r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', ''))) | ||||
|  | ||||
|         if not formats: | ||||
|             for quality in ['sd', 'hd']: | ||||
|                 # It's actually a link to a flv file | ||||
|                 flv_url = streams.get('f4m_{0}'.format(quality)) | ||||
| @@ -71,6 +115,7 @@ class GameSpotIE(InfoExtractor): | ||||
|                         'ext': 'flv', | ||||
|                         'format_id': quality, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': data_video['guid'], | ||||
|   | ||||
| @@ -1,62 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_age_limit, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GametrailersIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review', | ||||
|         'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a', | ||||
|         'info_dict': { | ||||
|             'id': '2983958', | ||||
|             'ext': 'mp4', | ||||
|             'display_id': '116437-Just-Cause-3-Review', | ||||
|             'title': 'Just Cause 3 - Review', | ||||
|             'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>(.+?)\|', webpage, 'title').strip() | ||||
|         embed_url = self._proto_relative_url( | ||||
|             self._search_regex( | ||||
|                 r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage, | ||||
|                 'embed url'), | ||||
|             scheme='http:') | ||||
|         video_id = url_basename(embed_url) | ||||
|         embed_page = self._download_webpage(embed_url, video_id) | ||||
|         embed_vars_json = self._search_regex( | ||||
|             r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page, | ||||
|             'embed vars') | ||||
|         info = self._parse_json(embed_vars_json, video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for media in info['media']: | ||||
|             if media['mediaPurpose'] == 'play': | ||||
|                 formats.append({ | ||||
|                     'url': media['uri'], | ||||
|                     'height': media['height'], | ||||
|                     'width:': media['width'], | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': info.get('thumbUri'), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'duration': int_or_none(info.get('videoLengthInSeconds')), | ||||
|             'age_limit': parse_age_limit(info.get('audienceRating')), | ||||
|         } | ||||
| @@ -64,6 +64,9 @@ from .liveleak import LiveLeakIE | ||||
| from .threeqsdn import ThreeQSDNIE | ||||
| from .theplatform import ThePlatformIE | ||||
| from .vessel import VesselIE | ||||
| from .kaltura import KalturaIE | ||||
| from .eagleplatform import EaglePlatformIE | ||||
| from .facebook import FacebookIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -920,6 +923,24 @@ class GenericIE(InfoExtractor): | ||||
|             }, | ||||
|             'add_ie': ['Kaltura'], | ||||
|         }, | ||||
|         { | ||||
|             # Kaltura embedded via quoted entry_id | ||||
|             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures', | ||||
|             'info_dict': { | ||||
|                 'id': '0_utuok90b', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '06_matthew_brender_raj_dutt', | ||||
|                 'timestamp': 1466638791, | ||||
|                 'upload_date': '20160622', | ||||
|             }, | ||||
|             'add_ie': ['Kaltura'], | ||||
|             'expected_warnings': [ | ||||
|                 'Could not send HEAD request' | ||||
|             ], | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         }, | ||||
|         # Eagle.Platform embed (generic URL) | ||||
|         { | ||||
|             'url': 'http://lenta.ru/news/2015/03/06/navalny/', | ||||
| @@ -1091,12 +1112,17 @@ class GenericIE(InfoExtractor): | ||||
|         # Dailymotion Cloud video | ||||
|         { | ||||
|             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910', | ||||
|             'md5': '49444254273501a64675a7e68c502681', | ||||
|             'md5': 'dcaf23ad0c67a256f4278bce6e0bae38', | ||||
|             'info_dict': { | ||||
|                 'id': '5585de919473990de4bee11b', | ||||
|                 'id': 'x2uy8t3', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Le débat', | ||||
|                 'title': 'Sauvons les abeilles ! - Le débat', | ||||
|                 'description': 'md5:d9082128b1c5277987825d684939ca26', | ||||
|                 'thumbnail': 're:^https?://.*\.jpe?g$', | ||||
|                 'timestamp': 1434970506, | ||||
|                 'upload_date': '20150622', | ||||
|                 'uploader': 'Public Sénat', | ||||
|                 'uploader_id': 'xa9gza', | ||||
|             } | ||||
|         }, | ||||
|         # OnionStudios embed | ||||
| @@ -1220,6 +1246,55 @@ class GenericIE(InfoExtractor): | ||||
|                 'uploader': 'www.hudl.com', | ||||
|             }, | ||||
|         }, | ||||
|         # twitter:player embed | ||||
|         { | ||||
|             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/', | ||||
|             'md5': 'a3e0df96369831de324f0778e126653c', | ||||
|             'info_dict': { | ||||
|                 'id': '4909620399001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'What Do Black Holes Sound Like?', | ||||
|                 'description': 'what do black holes sound like', | ||||
|                 'upload_date': '20160524', | ||||
|                 'uploader_id': '29913724001', | ||||
|                 'timestamp': 1464107587, | ||||
|                 'uploader': 'TheAtlantic', | ||||
|             }, | ||||
|             'add_ie': ['BrightcoveLegacy'], | ||||
|         }, | ||||
|         # Facebook <iframe> embed | ||||
|         { | ||||
|             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', | ||||
|             'md5': 'fbcde74f534176ecb015849146dd3aee', | ||||
|             'info_dict': { | ||||
|                 'id': '599637780109885', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Facebook video #599637780109885', | ||||
|             }, | ||||
|         }, | ||||
|         # Facebook API embed | ||||
|         { | ||||
|             'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/', | ||||
|             'md5': 'a47372ee61b39a7b90287094d447d94e', | ||||
|             'info_dict': { | ||||
|                 'id': '10153467542406923', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Facebook video #10153467542406923', | ||||
|             }, | ||||
|         }, | ||||
|         # Wordpress "YouTube Video Importer" plugin | ||||
|         { | ||||
|             'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/', | ||||
|             'md5': 'd16797741b560b485194eddda8121b48', | ||||
|             'info_dict': { | ||||
|                 'id': 'HNTXWDXV9Is', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Blue Devils Drumline Stanford lot 2016', | ||||
|                 'upload_date': '20160627', | ||||
|                 'uploader_id': 'GENOCIDE8GENERAL10', | ||||
|                 'uploader': 'cylus cyrus', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -1576,6 +1651,13 @@ class GenericIE(InfoExtractor): | ||||
|         if matches: | ||||
|             return _playlist_from_matches(matches, lambda m: unescapeHTML(m)) | ||||
|  | ||||
|         # Look for Wordpress "YouTube Video Importer" plugin | ||||
|         matches = re.findall(r'''(?x)<div[^>]+ | ||||
|             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ | ||||
|             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches(matches, lambda m: m[-1]) | ||||
|  | ||||
|         # Look for embedded Dailymotion player | ||||
|         matches = re.findall( | ||||
|             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) | ||||
| @@ -1718,10 +1800,9 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result(mobj.group('url')) | ||||
|  | ||||
|         # Look for embedded Facebook player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Facebook') | ||||
|         facebook_url = FacebookIE._extract_url(webpage) | ||||
|         if facebook_url is not None: | ||||
|             return self.url_result(facebook_url, 'Facebook') | ||||
|  | ||||
|         # Look for embedded VK player | ||||
|         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) | ||||
| @@ -1903,18 +1984,14 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result(mobj.group('url'), 'Zapiks') | ||||
|  | ||||
|         # Look for Kaltura embeds | ||||
|         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or | ||||
|                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage)) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(smuggle_url( | ||||
|                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), | ||||
|                 {'source_url': url}), 'Kaltura') | ||||
|         kaltura_url = KalturaIE._extract_url(webpage) | ||||
|         if kaltura_url: | ||||
|             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) | ||||
|  | ||||
|         # Look for Eagle.Platform embeds | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'EaglePlatform') | ||||
|         eagleplatform_url = EaglePlatformIE._extract_url(webpage) | ||||
|         if eagleplatform_url: | ||||
|             return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key()) | ||||
|  | ||||
|         # Look for ClipYou (uses Eagle.Platform) embeds | ||||
|         mobj = re.search( | ||||
| @@ -2060,6 +2137,11 @@ class GenericIE(InfoExtractor): | ||||
|                 'uploader': video_uploader, | ||||
|             } | ||||
|  | ||||
|         # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser | ||||
|         embed_url = self._html_search_meta('twitter:player', webpage, default=None) | ||||
|         if embed_url: | ||||
|             return self.url_result(embed_url) | ||||
|  | ||||
|         def check_video(vurl): | ||||
|             if YoutubeIE.suitable(vurl): | ||||
|                 return True | ||||
|   | ||||
							
								
								
									
										202
									
								
								youtube_dl/extractor/hrti.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										202
									
								
								youtube_dl/extractor/hrti.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,202 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_age_limit, | ||||
|     sanitized_Request, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class HRTiBaseIE(InfoExtractor): | ||||
|     """ | ||||
|         Base Information Extractor for Croatian Radiotelevision | ||||
|         video on demand site https://hrti.hrt.hr | ||||
|         Reverse engineered from the JavaScript app in app.min.js | ||||
|     """ | ||||
|     _NETRC_MACHINE = 'hrti' | ||||
|  | ||||
|     _APP_LANGUAGE = 'hr' | ||||
|     _APP_VERSION = '1.1' | ||||
|     _APP_PUBLICATION_ID = 'all_in_one' | ||||
|     _API_URL = 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json' | ||||
|  | ||||
|     def _initialize_api(self): | ||||
|         init_data = { | ||||
|             'application_publication_id': self._APP_PUBLICATION_ID | ||||
|         } | ||||
|  | ||||
|         uuid = self._download_json( | ||||
|             self._API_URL, None, note='Downloading uuid', | ||||
|             errnote='Unable to download uuid', | ||||
|             data=json.dumps(init_data).encode('utf-8'))['uuid'] | ||||
|  | ||||
|         app_data = { | ||||
|             'uuid': uuid, | ||||
|             'application_publication_id': self._APP_PUBLICATION_ID, | ||||
|             'application_version': self._APP_VERSION | ||||
|         } | ||||
|  | ||||
|         req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8')) | ||||
|         req.get_method = lambda: 'PUT' | ||||
|  | ||||
|         resources = self._download_json( | ||||
|             req, None, note='Downloading session information', | ||||
|             errnote='Unable to download session information') | ||||
|  | ||||
|         self._session_id = resources['session_id'] | ||||
|  | ||||
|         modules = resources['modules'] | ||||
|  | ||||
|         self._search_url = modules['vod_catalog']['resources']['search']['uri'].format( | ||||
|             language=self._APP_LANGUAGE, | ||||
|             application_id=self._APP_PUBLICATION_ID) | ||||
|  | ||||
|         self._login_url = (modules['user']['resources']['login']['uri'] + | ||||
|                            '/format/json').format(session_id=self._session_id) | ||||
|  | ||||
|         self._logout_url = modules['user']['resources']['logout']['uri'] | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         # TODO: figure out authentication with cookies | ||||
|         if username is None or password is None: | ||||
|             self.raise_login_required() | ||||
|  | ||||
|         auth_data = { | ||||
|             'username': username, | ||||
|             'password': password, | ||||
|         } | ||||
|  | ||||
|         try: | ||||
|             auth_info = self._download_json( | ||||
|                 self._login_url, None, note='Logging in', errnote='Unable to log in', | ||||
|                 data=json.dumps(auth_data).encode('utf-8')) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406: | ||||
|                 auth_info = self._parse_json(e.cause.read().encode('utf-8'), None) | ||||
|             else: | ||||
|                 raise | ||||
|  | ||||
|         error_message = auth_info.get('error', {}).get('message') | ||||
|         if error_message: | ||||
|             raise ExtractorError( | ||||
|                 '%s said: %s' % (self.IE_NAME, error_message), | ||||
|                 expected=True) | ||||
|  | ||||
|         self._token = auth_info['secure_streaming_token'] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._initialize_api() | ||||
|         self._login() | ||||
|  | ||||
|  | ||||
| class HRTiIE(HRTiBaseIE): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                         (?: | ||||
|                             hrti:(?P<short_id>[0-9]+)| | ||||
|                             https?:// | ||||
|                                 hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)? | ||||
|                         ) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://hrti.hrt.hr/#/video/show/2181385/republika-dokumentarna-serija-16-hd', | ||||
|         'info_dict': { | ||||
|             'id': '2181385', | ||||
|             'display_id': 'republika-dokumentarna-serija-16-hd', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'REPUBLIKA, dokumentarna serija (1/6) (HD)', | ||||
|             'description': 'md5:48af85f620e8e0e1df4096270568544f', | ||||
|             'duration': 2922, | ||||
|             'view_count': int, | ||||
|             'average_rating': int, | ||||
|             'episode_number': int, | ||||
|             'season_number': int, | ||||
|             'age_limit': 12, | ||||
|         }, | ||||
|         'skip': 'Requires account credentials', | ||||
|     }, { | ||||
|         'url': 'https://hrti.hrt.hr/#/video/show/2181385/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'hrti:2181385', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('short_id') or mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         video = self._download_json( | ||||
|             '%s/video_id/%s/format/json' % (self._search_url, video_id), | ||||
|             display_id, 'Downloading video metadata JSON')['video'][0] | ||||
|  | ||||
|         title_info = video['title'] | ||||
|         title = title_info['title_long'] | ||||
|  | ||||
|         movie = video['video_assets']['movie'][0] | ||||
|         m3u8_url = movie['url'].format(TOKEN=self._token) | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native', | ||||
|             m3u8_id='hls') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = clean_html(title_info.get('summary_long')) | ||||
|         age_limit = parse_age_limit(video.get('parental_control', {}).get('rating')) | ||||
|         view_count = int_or_none(video.get('views')) | ||||
|         average_rating = int_or_none(video.get('user_rating')) | ||||
|         duration = int_or_none(movie.get('duration')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'average_rating': average_rating, | ||||
|             'age_limit': age_limit, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class HRTiPlaylistIE(HRTiBaseIE): | ||||
|     _VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', | ||||
|         'info_dict': { | ||||
|             'id': '212', | ||||
|             'title': 'ekumena', | ||||
|         }, | ||||
|         'playlist_mincount': 8, | ||||
|         'skip': 'Requires account credentials', | ||||
|     }, { | ||||
|         'url': 'https://hrti.hrt.hr/#/video/list/category/212/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         category_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or category_id | ||||
|  | ||||
|         response = self._download_json( | ||||
|             '%s/category_id/%s/format/json' % (self._search_url, category_id), | ||||
|             display_id, 'Downloading video metadata JSON') | ||||
|  | ||||
|         video_ids = try_get( | ||||
|             response, lambda x: x['video_listings'][0]['alternatives'][0]['list'], | ||||
|             list) or [video['id'] for video in response.get('videos', []) if video.get('id')] | ||||
|  | ||||
|         entries = [self.url_result('hrti:%s' % category_id) for category_id in video_ids] | ||||
|  | ||||
|         return self.playlist_result(entries, category_id, display_id) | ||||
| @@ -1,30 +1,25 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import binascii | ||||
| import hashlib | ||||
| import itertools | ||||
| import math | ||||
| import os | ||||
| import random | ||||
| import re | ||||
| import time | ||||
| import uuid | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_str, | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_urllib_parse_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     decode_packed_codes, | ||||
|     ExtractorError, | ||||
|     intlist_to_bytes, | ||||
|     ohdave_rsa_encrypt, | ||||
|     remove_start, | ||||
|     sanitized_Request, | ||||
|     urlencode_postdata, | ||||
|     url_basename, | ||||
|     urshift, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -171,70 +166,21 @@ class IqiyiIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.iqiyi.com/v_19rrojlavg.html', | ||||
|         'md5': '2cb594dc2781e6c941a110d8f358118b', | ||||
|         'md5': '470a6c160618577166db1a7aac5a3606', | ||||
|         'info_dict': { | ||||
|             'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', | ||||
|             'ext': 'mp4', | ||||
|             'title': '美国德州空中惊现奇异云团 酷似UFO', | ||||
|             'ext': 'f4v', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', | ||||
|         'md5': 'f09f0a6a59b2da66a26bf4eda669a4cc', | ||||
|         'info_dict': { | ||||
|             'id': 'e3f585b550a280af23c98b6cb2be19fb', | ||||
|             'title': '名侦探柯南第752集', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }], | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|             'ext': 'mp4', | ||||
|             'title': '名侦探柯南 国语版', | ||||
|         }, | ||||
|         'skip': 'Geo-restricted to China', | ||||
|     }, { | ||||
|         'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', | ||||
|         'only_matching': True, | ||||
| @@ -287,13 +233,6 @@ class IqiyiIE(InfoExtractor): | ||||
|         ('10', 'h1'), | ||||
|     ] | ||||
|  | ||||
|     AUTH_API_ERRORS = { | ||||
|         # No preview available (不允许试看鉴权失败) | ||||
|         'Q00505': 'This video requires a VIP account', | ||||
|         # End of preview time (试看结束鉴权失败) | ||||
|         'Q00506': 'Needs a VIP account for full video', | ||||
|     } | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
| @@ -352,177 +291,101 @@ class IqiyiIE(InfoExtractor): | ||||
|  | ||||
|         return True | ||||
|  | ||||
|     def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning): | ||||
|         auth_params = { | ||||
|             # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as | ||||
|             'version': '2.0', | ||||
|             'platform': 'b6c13e26323c537d', | ||||
|             'aid': tvid, | ||||
|     @staticmethod | ||||
|     def _gen_sc(tvid, timestamp): | ||||
|         M = [1732584193, -271733879] | ||||
|         M.extend([~M[0], ~M[1]]) | ||||
|         I_table = [7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21] | ||||
|         C_base = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8388608, 432] | ||||
|  | ||||
|         def L(n, t): | ||||
|             if t is None: | ||||
|                 t = 0 | ||||
|             return trunc(((n >> 1) + (t >> 1) << 1) + (n & 1) + (t & 1)) | ||||
|  | ||||
|         def trunc(n): | ||||
|             n = n % 0x100000000 | ||||
|             if n > 0x7fffffff: | ||||
|                 n -= 0x100000000 | ||||
|             return n | ||||
|  | ||||
|         def transform(string, mod): | ||||
|             num = int(string, 16) | ||||
|             return (num >> 8 * (i % 4) & 255 ^ i % mod) << ((a & 3) << 3) | ||||
|  | ||||
|         C = list(C_base) | ||||
|         o = list(M) | ||||
|         k = str(timestamp - 7) | ||||
|         for i in range(13): | ||||
|             a = i | ||||
|             C[a >> 2] |= ord(k[a]) << 8 * (a % 4) | ||||
|  | ||||
|         for i in range(16): | ||||
|             a = i + 13 | ||||
|             start = (i >> 2) * 8 | ||||
|             r = '03967743b643f66763d623d637e30733' | ||||
|             C[a >> 2] |= transform(''.join(reversed(r[start:start + 8])), 7) | ||||
|  | ||||
|         for i in range(16): | ||||
|             a = i + 29 | ||||
|             start = (i >> 2) * 8 | ||||
|             r = '7038766939776a32776a32706b337139' | ||||
|             C[a >> 2] |= transform(r[start:start + 8], 1) | ||||
|  | ||||
|         for i in range(9): | ||||
|             a = i + 45 | ||||
|             if i < len(tvid): | ||||
|                 C[a >> 2] |= ord(tvid[i]) << 8 * (a % 4) | ||||
|  | ||||
|         for a in range(64): | ||||
|             i = a | ||||
|             I = i >> 4 | ||||
|             C_index = [i, 5 * i + 1, 3 * i + 5, 7 * i][I] % 16 + urshift(a, 6) | ||||
|             m = L(L(o[0], [ | ||||
|                 trunc(o[1] & o[2]) | trunc(~o[1] & o[3]), | ||||
|                 trunc(o[3] & o[1]) | trunc(~o[3] & o[2]), | ||||
|                 o[1] ^ o[2] ^ o[3], | ||||
|                 o[2] ^ trunc(o[1] | ~o[3]) | ||||
|             ][I]), L( | ||||
|                 trunc(int(abs(math.sin(i + 1)) * 4294967296)), | ||||
|                 C[C_index] if C_index < len(C) else None)) | ||||
|             I = I_table[4 * I + i % 4] | ||||
|             o = [o[3], | ||||
|                  L(o[1], trunc(trunc(m << I) | urshift(m, 32 - I))), | ||||
|                  o[1], | ||||
|                  o[2]] | ||||
|  | ||||
|         new_M = [L(o[0], M[0]), L(o[1], M[1]), L(o[2], M[2]), L(o[3], M[3])] | ||||
|         s = [new_M[a >> 3] >> (1 ^ a & 7) * 4 & 15 for a in range(32)] | ||||
|         return binascii.hexlify(intlist_to_bytes(s))[1::2].decode('ascii') | ||||
|  | ||||
|     def get_raw_data(self, tvid, video_id): | ||||
|         tm = int(time.time() * 1000) | ||||
|  | ||||
|         sc = self._gen_sc(tvid, tm) | ||||
|         params = { | ||||
|             'platForm': 'h5', | ||||
|             'rate': 1, | ||||
|             'tvid': tvid, | ||||
|             'uid': '', | ||||
|             'deviceId': _uuid, | ||||
|             'playType': 'main',  # XXX: always main? | ||||
|             'filename': os.path.splitext(url_basename(api_video_url))[0], | ||||
|         } | ||||
|  | ||||
|         qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query) | ||||
|         for key, val in qd_items.items(): | ||||
|             auth_params[key] = val[0] | ||||
|  | ||||
|         auth_req = sanitized_Request( | ||||
|             'http://api.vip.iqiyi.com/services/ckn.action', | ||||
|             urlencode_postdata(auth_params)) | ||||
|         # iQiyi server throws HTTP 405 error without the following header | ||||
|         auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         auth_result = self._download_json( | ||||
|             auth_req, video_id, | ||||
|             note='Downloading video authentication JSON', | ||||
|             errnote='Unable to download video authentication JSON') | ||||
|  | ||||
|         code = auth_result.get('code') | ||||
|         msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code | ||||
|         if code == 'Q00506': | ||||
|             if do_report_warning: | ||||
|                 self.report_warning(msg) | ||||
|             return False | ||||
|         if 'data' not in auth_result: | ||||
|             if msg is not None: | ||||
|                 raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True) | ||||
|             raise ExtractorError('Unexpected error from Iqiyi auth API') | ||||
|  | ||||
|         return auth_result['data'] | ||||
|  | ||||
|     def construct_video_urls(self, data, video_id, _uuid, tvid): | ||||
|         def do_xor(x, y): | ||||
|             a = y % 3 | ||||
|             if a == 1: | ||||
|                 return x ^ 121 | ||||
|             if a == 2: | ||||
|                 return x ^ 72 | ||||
|             return x ^ 103 | ||||
|  | ||||
|         def get_encode_code(l): | ||||
|             a = 0 | ||||
|             b = l.split('-') | ||||
|             c = len(b) | ||||
|             s = '' | ||||
|             for i in range(c - 1, -1, -1): | ||||
|                 a = do_xor(int(b[c - i - 1], 16), i) | ||||
|                 s += chr(a) | ||||
|             return s[::-1] | ||||
|  | ||||
|         def get_path_key(x, format_id, segment_index): | ||||
|             mg = ')(*&^flash@#$%a' | ||||
|             tm = self._download_json( | ||||
|                 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id, | ||||
|                 note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) | ||||
|             )['t'] | ||||
|             t = str(int(math.floor(int(tm) / (600.0)))) | ||||
|             return md5_text(t + mg + x) | ||||
|  | ||||
|         video_urls_dict = {} | ||||
|         need_vip_warning_report = True | ||||
|         for format_item in data['vp']['tkl'][0]['vs']: | ||||
|             if 0 < int(format_item['bid']) <= 10: | ||||
|                 format_id = self.get_format(format_item['bid']) | ||||
|             else: | ||||
|                 continue | ||||
|  | ||||
|             video_urls = [] | ||||
|  | ||||
|             video_urls_info = format_item['fs'] | ||||
|             if not format_item['fs'][0]['l'].startswith('/'): | ||||
|                 t = get_encode_code(format_item['fs'][0]['l']) | ||||
|                 if t.endswith('mp4'): | ||||
|                     video_urls_info = format_item['flvs'] | ||||
|  | ||||
|             for segment_index, segment in enumerate(video_urls_info): | ||||
|                 vl = segment['l'] | ||||
|                 if not vl.startswith('/'): | ||||
|                     vl = get_encode_code(vl) | ||||
|                 is_vip_video = '/vip/' in vl | ||||
|                 filesize = segment['b'] | ||||
|                 base_url = data['vp']['du'].split('/') | ||||
|                 if not is_vip_video: | ||||
|                     key = get_path_key( | ||||
|                         vl.split('/')[-1].split('.')[0], format_id, segment_index) | ||||
|                     base_url.insert(-1, key) | ||||
|                 base_url = '/'.join(base_url) | ||||
|                 param = { | ||||
|                     'su': _uuid, | ||||
|                     'qyid': uuid.uuid4().hex, | ||||
|                     'client': '', | ||||
|                     'z': '', | ||||
|                     'bt': '', | ||||
|                     'ct': '', | ||||
|                     'tn': str(int(time.time())) | ||||
|                 } | ||||
|                 api_video_url = base_url + vl | ||||
|                 if is_vip_video: | ||||
|                     api_video_url = api_video_url.replace('.f4v', '.hml') | ||||
|                     auth_result = self._authenticate_vip_video( | ||||
|                         api_video_url, video_id, tvid, _uuid, need_vip_warning_report) | ||||
|                     if auth_result is False: | ||||
|                         need_vip_warning_report = False | ||||
|                         break | ||||
|                     param.update({ | ||||
|                         't': auth_result['t'], | ||||
|                         # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as | ||||
|                         'cid': 'afbe8fd3d73448c9', | ||||
|                         'vid': video_id, | ||||
|                         'QY00001': auth_result['u'], | ||||
|                     }) | ||||
|                 api_video_url += '?' if '?' not in api_video_url else '&' | ||||
|                 api_video_url += compat_urllib_parse_urlencode(param) | ||||
|                 js = self._download_json( | ||||
|                     api_video_url, video_id, | ||||
|                     note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) | ||||
|                 video_url = js['l'] | ||||
|                 video_urls.append( | ||||
|                     (video_url, filesize)) | ||||
|  | ||||
|             video_urls_dict[format_id] = video_urls | ||||
|         return video_urls_dict | ||||
|  | ||||
|     def get_format(self, bid): | ||||
|         matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)] | ||||
|         return matched_format_ids[0] if len(matched_format_ids) else None | ||||
|  | ||||
|     def get_bid(self, format_id): | ||||
|         matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id] | ||||
|         return matched_bids[0] if len(matched_bids) else None | ||||
|  | ||||
|     def get_raw_data(self, tvid, video_id, enc_key, _uuid): | ||||
|         tm = str(int(time.time())) | ||||
|         tail = tm + tvid | ||||
|         param = { | ||||
|             'key': 'fvip', | ||||
|             'src': md5_text('youtube-dl'), | ||||
|             'tvId': tvid, | ||||
|             'vid': video_id, | ||||
|             'vinfo': 1, | ||||
|             'tm': tm, | ||||
|             'enc': md5_text(enc_key + tail), | ||||
|             'qyid': _uuid, | ||||
|             'tn': random.random(), | ||||
|             # In iQiyi's flash player, um is set to 1 if there's a logged user | ||||
|             # Some 1080P formats are only available with a logged user. | ||||
|             # Here force um=1 to trick the iQiyi server | ||||
|             'um': 1, | ||||
|             'authkey': md5_text(md5_text('') + tail), | ||||
|             'k_tag': 1, | ||||
|             'cupid': 'qc_100001_100186', | ||||
|             'type': 'mp4', | ||||
|             'nolimit': 0, | ||||
|             'agenttype': 13, | ||||
|             'src': 'd846d0c32d664d32b6b54ea48997a589', | ||||
|             'sc': sc, | ||||
|             't': tm - 7, | ||||
|             '__jsT': None, | ||||
|         } | ||||
|  | ||||
|         api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ | ||||
|             compat_urllib_parse_urlencode(param) | ||||
|         raw_data = self._download_json(api_url, video_id) | ||||
|         return raw_data | ||||
|  | ||||
|     def get_enc_key(self, video_id): | ||||
|         # TODO: automatic key extraction | ||||
|         # last update at 2016-01-22 for Zombie::bite | ||||
|         enc_key = '4a1caba4b4465345366f28da7c117d20' | ||||
|         return enc_key | ||||
|         headers = {} | ||||
|         cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') | ||||
|         if cn_verification_proxy: | ||||
|             headers['Ytdl-request-proxy'] = cn_verification_proxy | ||||
|         return self._download_json( | ||||
|             'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id), | ||||
|             video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='), | ||||
|             query=params, headers=headers) | ||||
|  | ||||
|     def _extract_playlist(self, webpage): | ||||
|         PAGE_SIZE = 50 | ||||
| @@ -571,58 +434,27 @@ class IqiyiIE(InfoExtractor): | ||||
|             r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') | ||||
|         video_id = self._search_regex( | ||||
|             r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') | ||||
|         _uuid = uuid.uuid4().hex | ||||
|  | ||||
|         enc_key = self.get_enc_key(video_id) | ||||
|         for _ in range(5): | ||||
|             raw_data = self.get_raw_data(tvid, video_id) | ||||
|  | ||||
|         raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) | ||||
|             if raw_data['code'] != 'A00000': | ||||
|                 if raw_data['code'] == 'A00111': | ||||
|                     self.raise_geo_restricted() | ||||
|                 raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) | ||||
|  | ||||
|         if raw_data['code'] != 'A000000': | ||||
|             raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) | ||||
|             data = raw_data['data'] | ||||
|  | ||||
|         data = raw_data['data'] | ||||
|             # iQiYi sometimes returns Ads | ||||
|             if not isinstance(data['playInfo'], dict): | ||||
|                 self._sleep(5, video_id) | ||||
|                 continue | ||||
|  | ||||
|         title = data['vi']['vn'] | ||||
|             title = data['playInfo']['an'] | ||||
|             break | ||||
|  | ||||
|         # generate video_urls_dict | ||||
|         video_urls_dict = self.construct_video_urls( | ||||
|             data, video_id, _uuid, tvid) | ||||
|  | ||||
|         # construct info | ||||
|         entries = [] | ||||
|         for format_id in video_urls_dict: | ||||
|             video_urls = video_urls_dict[format_id] | ||||
|             for i, video_url_info in enumerate(video_urls): | ||||
|                 if len(entries) < i + 1: | ||||
|                     entries.append({'formats': []}) | ||||
|                 entries[i]['formats'].append( | ||||
|                     { | ||||
|                         'url': video_url_info[0], | ||||
|                         'filesize': video_url_info[-1], | ||||
|                         'format_id': format_id, | ||||
|                         'preference': int(self.get_bid(format_id)) | ||||
|                     } | ||||
|                 ) | ||||
|  | ||||
|         for i in range(len(entries)): | ||||
|             self._sort_formats(entries[i]['formats']) | ||||
|             entries[i].update( | ||||
|                 { | ||||
|                     'id': '%s_part%d' % (video_id, i + 1), | ||||
|                     'title': title, | ||||
|                 } | ||||
|             ) | ||||
|  | ||||
|         if len(entries) > 1: | ||||
|             info = { | ||||
|                 '_type': 'multi_video', | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'entries': entries, | ||||
|             } | ||||
|         else: | ||||
|             info = entries[0] | ||||
|             info['id'] = video_id | ||||
|             info['title'] = title | ||||
|  | ||||
|         return info | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': data['m3u'], | ||||
|         } | ||||
|   | ||||
| @@ -64,6 +64,32 @@ class KalturaIE(InfoExtractor): | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         mobj = ( | ||||
|             re.search( | ||||
|                 r"""(?xs) | ||||
|                     kWidget\.(?:thumb)?[Ee]mbed\( | ||||
|                     \{.*? | ||||
|                         (?P<q1>['\"])wid(?P=q1)\s*:\s* | ||||
|                         (?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*? | ||||
|                         (?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s* | ||||
|                         (?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4), | ||||
|                 """, webpage) or | ||||
|             re.search( | ||||
|                 r'''(?xs) | ||||
|                     (?P<q1>["\']) | ||||
|                         (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*? | ||||
|                     (?P=q1).*? | ||||
|                     (?: | ||||
|                         entry_?[Ii]d| | ||||
|                         (?P<q2>["\'])entry_?[Ii]d(?P=q2) | ||||
|                     )\s*:\s* | ||||
|                     (?P<q3>["\'])(?P<id>.+?)(?P=q3) | ||||
|                 ''', webpage)) | ||||
|         if mobj: | ||||
|             return 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict() | ||||
|  | ||||
|     def _kaltura_api_call(self, video_id, actions, *args, **kwargs): | ||||
|         params = actions[0] | ||||
|         if len(actions) > 1: | ||||
|   | ||||
| @@ -1,60 +1,74 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     determine_ext, | ||||
|     js_to_json, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LA7IE(InfoExtractor): | ||||
|     IE_NAME = 'la7.tv' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://(?:www\.)?la7\.tv/ | ||||
|         (?: | ||||
|             richplayer/\?assetid=| | ||||
|             \?contentId= | ||||
|         ) | ||||
|         (?P<id>[0-9]+)''' | ||||
|     IE_NAME = 'la7.it' | ||||
|     _VALID_URL = r'''(?x)(https?://)?(?: | ||||
|         (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/| | ||||
|         tg\.la7\.it/repliche-tgla7\?id= | ||||
|     )(?P<id>.+)''' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.la7.tv/richplayer/?assetid=50355319', | ||||
|         'md5': 'ec7d1f0224d20ba293ab56cf2259651f', | ||||
|     _TESTS = [{ | ||||
|         # 'src' is a plain URL | ||||
|         'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', | ||||
|         'md5': '6054674766e7988d3e02f2148ff92180', | ||||
|         'info_dict': { | ||||
|             'id': '50355319', | ||||
|             'id': 'inccool8-02-10-2015-163722', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'IL DIVO', | ||||
|             'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci', | ||||
|             'duration': 6254, | ||||
|             'title': 'Inc.Cool8', | ||||
|             'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto  atletico', | ||||
|             'thumbnail': 're:^https?://.*', | ||||
|         }, | ||||
|         'skip': 'Blocked in the US', | ||||
|     } | ||||
|     }, { | ||||
|         # 'src' is a dictionary | ||||
|         'url': 'http://tg.la7.it/repliche-tgla7?id=189080', | ||||
|         'md5': '6b0d8888d286e39870208dfeceaf456b', | ||||
|         'info_dict': { | ||||
|             'id': '189080', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'TG LA7', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id | ||||
|         doc = self._download_xml(xml_url, video_id) | ||||
|  | ||||
|         video_title = doc.find('title').text | ||||
|         description = doc.find('description').text | ||||
|         duration = parse_duration(doc.find('duration').text) | ||||
|         thumbnail = doc.find('img').text | ||||
|         view_count = int(doc.find('views').text) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:') | ||||
|         player_data = self._parse_json( | ||||
|             self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'), | ||||
|             video_id, transform_source=js_to_json) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format': vnode.find('quality').text, | ||||
|             'tbr': int(vnode.find('quality').text), | ||||
|             'url': vnode.find('fms').text.strip().replace('mp4:', prefix), | ||||
|         } for vnode in doc.findall('.//videos/video')] | ||||
|         source = player_data['src'] | ||||
|         source_urls = source.values() if isinstance(source, dict) else [source] | ||||
|  | ||||
|         formats = [] | ||||
|         for source_url in source_urls: | ||||
|             ext = determine_ext(source_url) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     source_url, video_id, ext='mp4', | ||||
|                     entry_protocol='m3u8_native', m3u8_id='hls')) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': source_url, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'title': player_data['title'], | ||||
|             'description': self._og_search_description(webpage, default=None), | ||||
|             'thumbnail': player_data.get('poster'), | ||||
|             'formats': formats, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|   | ||||
| @@ -23,6 +23,7 @@ from ..utils import ( | ||||
|     sanitized_Request, | ||||
|     str_or_none, | ||||
|     url_basename, | ||||
|     urshift, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -74,15 +75,11 @@ class LeIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def urshift(val, n): | ||||
|         return val >> n if val >= 0 else (val + 0x100000000) >> n | ||||
|  | ||||
|     # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf | ||||
|     def ror(self, param1, param2): | ||||
|         _loc3_ = 0 | ||||
|         while _loc3_ < param2: | ||||
|             param1 = self.urshift(param1, 1) + ((param1 & 1) << 31) | ||||
|             param1 = urshift(param1, 1) + ((param1 & 1) << 31) | ||||
|             _loc3_ += 1 | ||||
|         return param1 | ||||
|  | ||||
|   | ||||
| @@ -1,8 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -23,34 +21,5 @@ class M6IE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id, | ||||
|                                  'Downloading video RSS') | ||||
|  | ||||
|         title = rss.find('./channel/item/title').text | ||||
|         description = rss.find('./channel/item/description').text | ||||
|         thumbnail = rss.find('./channel/item/visuel_clip_big').text | ||||
|         duration = int(rss.find('./channel/item/duration').text) | ||||
|         view_count = int(rss.find('./channel/item/nombre_vues').text) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id in ['lq', 'sd', 'hq', 'hd']: | ||||
|             video_url = rss.find('./channel/item/url_video_%s' % format_id) | ||||
|             if video_url is None: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': video_url.text, | ||||
|                 'format_id': format_id, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
|         video_id = self._match_id(url) | ||||
|         return self.url_result('6play:%s' % video_id, 'SixPlay', video_id) | ||||
|   | ||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/meta.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/meta.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .pladform import PladformIE | ||||
| from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     int_or_none, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class METAIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://video.meta.ua/5502115.video', | ||||
|         'md5': '71b6f3ee274bef16f1ab410f7f56b476', | ||||
|         'info_dict': { | ||||
|             'id': '5502115', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sony Xperia Z camera test [HQ]', | ||||
|             'description': 'Xperia Z shoots video in FullHD HDR.', | ||||
|             'uploader_id': 'nomobile', | ||||
|             'uploader': 'CHЁZA.TV', | ||||
|             'upload_date': '20130211', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }, { | ||||
|         'url': 'http://video.meta.ua/iframe/5502115', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # pladform embed | ||||
|         'url': 'http://video.meta.ua/7121015.video', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         st_html5 = self._search_regex( | ||||
|             r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None) | ||||
|  | ||||
|         if st_html5: | ||||
|             # uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js | ||||
|             json_str = '' | ||||
|             for i in range(0, len(st_html5), 3): | ||||
|                 json_str += '�%s;' % st_html5[i:i + 3] | ||||
|             uppod_data = self._parse_json(unescapeHTML(json_str), video_id) | ||||
|             error = uppod_data.get('customnotfound') | ||||
|             if error: | ||||
|                 raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | ||||
|  | ||||
|             video_url = uppod_data['file'] | ||||
|             info = { | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'title': uppod_data.get('comment') or self._og_search_title(webpage), | ||||
|                 'description': self._og_search_description(webpage, default=None), | ||||
|                 'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage), | ||||
|                 'duration': int_or_none(self._og_search_property( | ||||
|                     'video:duration', webpage, default=None)), | ||||
|             } | ||||
|             if 'youtube.com/' in video_url: | ||||
|                 info.update({ | ||||
|                     '_type': 'url_transparent', | ||||
|                     'ie_key': 'Youtube', | ||||
|                 }) | ||||
|             return info | ||||
|  | ||||
|         pladform_url = PladformIE._extract_url(webpage) | ||||
|         if pladform_url: | ||||
|             return self.url_result(pladform_url) | ||||
| @@ -102,11 +102,11 @@ class MixcloudIE(InfoExtractor): | ||||
|         description = self._og_search_description(webpage) | ||||
|         like_count = parse_count(self._search_regex( | ||||
|             r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)', | ||||
|             webpage, 'like count', fatal=False)) | ||||
|             webpage, 'like count', default=None)) | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | ||||
|              r'/listeners/?">([0-9,.]+)</a>'], | ||||
|             webpage, 'play count', fatal=False)) | ||||
|             webpage, 'play count', default=None)) | ||||
|  | ||||
|         return { | ||||
|             'id': track_id, | ||||
|   | ||||
							
								
								
									
										122
									
								
								youtube_dl/extractor/msn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								youtube_dl/extractor/msn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MSNIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE', | ||||
|         'md5': '8442f66c116cbab1ff7098f986983458', | ||||
|         'info_dict': { | ||||
|             'id': 'BBqQYNE', | ||||
|             'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message', | ||||
|             'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25', | ||||
|             'duration': 104, | ||||
|             'uploader': 'CBS Entertainment', | ||||
|             'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # geo restricted | ||||
|         'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id, display_id = mobj.group('id', 'display_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', | ||||
|                 webpage, 'video data', default='{}', group='data'), | ||||
|             display_id, transform_source=unescapeHTML) | ||||
|  | ||||
|         if not video: | ||||
|             error = unescapeHTML(self._search_regex( | ||||
|                 r'data-error=(["\'])(?P<error>.+?)\1', | ||||
|                 webpage, 'error', group='error')) | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | ||||
|  | ||||
|         title = video['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for file_ in video.get('videoFiles', []): | ||||
|             format_url = file_.get('url') | ||||
|             if not format_url: | ||||
|                 continue | ||||
|             ext = determine_ext(format_url) | ||||
|             # .ism is not yet supported (see | ||||
|             # https://github.com/rg3/youtube-dl/issues/8118) | ||||
|             if ext == 'ism': | ||||
|                 continue | ||||
|             if 'm3u8' in format_url: | ||||
|                 # m3u8_native should not be used here until | ||||
|                 # https://github.com/rg3/youtube-dl/issues/9913 is fixed | ||||
|                 m3u8_formats = self._extract_m3u8_formats( | ||||
|                     format_url, display_id, 'mp4', | ||||
|                     m3u8_id='hls', fatal=False) | ||||
|                 # Despite metadata in m3u8 all video+audio formats are | ||||
|                 # actually video-only (no audio) | ||||
|                 for f in m3u8_formats: | ||||
|                     if f.get('acodec') != 'none' and f.get('vcodec') != 'none': | ||||
|                         f['acodec'] = 'none' | ||||
|                 formats.extend(m3u8_formats) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'ext': 'mp4', | ||||
|                     'format_id': 'http', | ||||
|                     'width': int_or_none(file_.get('width')), | ||||
|                     'height': int_or_none(file_.get('height')), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for file_ in video.get('files', []): | ||||
|             format_url = file_.get('url') | ||||
|             format_code = file_.get('formatCode') | ||||
|             if not format_url or not format_code: | ||||
|                 continue | ||||
|             if compat_str(format_code) == '3100': | ||||
|                 subtitles.setdefault(file_.get('culture', 'en'), []).append({ | ||||
|                     'ext': determine_ext(format_url, 'ttml'), | ||||
|                     'url': format_url, | ||||
|                 }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': video.get('description'), | ||||
|             'thumbnail': video.get('headlineImage', {}).get('url'), | ||||
|             'duration': int_or_none(video.get('durationSecs')), | ||||
|             'uploader': video.get('sourceFriendly'), | ||||
|             'uploader_id': video.get('providerId'), | ||||
|             'creator': video.get('creator'), | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -9,10 +9,6 @@ from ..utils import ( | ||||
|     lowercase_escape, | ||||
|     smuggle_url, | ||||
|     unescapeHTML, | ||||
|     update_url_query, | ||||
|     int_or_none, | ||||
|     HEADRequest, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -192,9 +188,9 @@ class CSNNEIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class NBCNewsIE(ThePlatformIE): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today)\.com/ | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/ | ||||
|         (?:video/.+?/(?P<id>\d+)| | ||||
|         ([^/]+/)*(?P<display_id>[^/?]+)) | ||||
|         ([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+)) | ||||
|         ''' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -216,13 +212,16 @@ class NBCNewsIE(ThePlatformIE): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'How Twitter Reacted To The Snowden Interview', | ||||
|                 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', | ||||
|                 'uploader': 'NBCU-NEWS', | ||||
|                 'timestamp': 1401363060, | ||||
|                 'upload_date': '20140529', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156', | ||||
|             'md5': 'fdbf39ab73a72df5896b6234ff98518a', | ||||
|             'info_dict': { | ||||
|                 'id': 'Wjf9EDR3A_60', | ||||
|                 'id': '529953347624', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'FULL EPISODE: Family Business', | ||||
|                 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', | ||||
| @@ -237,6 +236,9 @@ class NBCNewsIE(ThePlatformIE): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', | ||||
|                 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', | ||||
|                 'timestamp': 1423104900, | ||||
|                 'uploader': 'NBCU-NEWS', | ||||
|                 'upload_date': '20150205', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -245,10 +247,12 @@ class NBCNewsIE(ThePlatformIE): | ||||
|             'info_dict': { | ||||
|                 'id': '529953347624', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'', | ||||
|                 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301', | ||||
|                 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', | ||||
|                 'description': 'md5:c8be487b2d80ff0594c005add88d8351', | ||||
|                 'upload_date': '20150922', | ||||
|                 'timestamp': 1442917800, | ||||
|                 'uploader': 'NBCU-NEWS', | ||||
|             }, | ||||
|             'expected_warnings': ['http-6000 is not available'] | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', | ||||
| @@ -260,6 +264,22 @@ class NBCNewsIE(ThePlatformIE): | ||||
|                 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', | ||||
|                 'upload_date': '20160420', | ||||
|                 'timestamp': 1461152093, | ||||
|                 'uploader': 'NBCU-NEWS', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', | ||||
|             'md5': '6d236bf4f3dddc226633ce6e2c3f814d', | ||||
|             'info_dict': { | ||||
|                 'id': '314487875924', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'The chaotic GOP immigration vote', | ||||
|                 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'timestamp': 1406937606, | ||||
|                 'upload_date': '20140802', | ||||
|                 'uploader': 'NBCU-NEWS', | ||||
|                 'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -290,105 +310,28 @@ class NBCNewsIE(ThePlatformIE): | ||||
|             } | ||||
|         else: | ||||
|             # "feature" and "nightly-news" pages use theplatform.com | ||||
|             display_id = mobj.group('display_id') | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|             info = None | ||||
|             bootstrap_json = self._search_regex( | ||||
|                 [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', | ||||
|                  r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], | ||||
|                 webpage, 'bootstrap json', default=None) | ||||
|             bootstrap = self._parse_json( | ||||
|                 bootstrap_json, display_id, transform_source=unescapeHTML) | ||||
|             if 'results' in bootstrap: | ||||
|                 info = bootstrap['results'][0]['video'] | ||||
|             elif 'video' in bootstrap: | ||||
|                 info = bootstrap['video'] | ||||
|             else: | ||||
|                 info = bootstrap | ||||
|             video_id = info['mpxId'] | ||||
|             title = info['title'] | ||||
|  | ||||
|             subtitles = {} | ||||
|             caption_links = info.get('captionLinks') | ||||
|             if caption_links: | ||||
|                 for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')): | ||||
|                     sub_url = caption_links.get(sub_key) | ||||
|                     if sub_url: | ||||
|                         subtitles.setdefault('en', []).append({ | ||||
|                             'url': sub_url, | ||||
|                             'ext': sub_ext, | ||||
|                         }) | ||||
|  | ||||
|             formats = [] | ||||
|             for video_asset in info['videoAssets']: | ||||
|                 video_url = video_asset.get('publicUrl') | ||||
|                 if not video_url: | ||||
|                     continue | ||||
|                 container = video_asset.get('format') | ||||
|                 asset_type = video_asset.get('assetType') or '' | ||||
|                 if container == 'ISM' or asset_type == 'FireTV-Once': | ||||
|                     continue | ||||
|                 elif asset_type == 'OnceURL': | ||||
|                     tp_formats, tp_subtitles = self._extract_theplatform_smil( | ||||
|                         video_url, video_id) | ||||
|                     formats.extend(tp_formats) | ||||
|                     subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||
|             video_id = mobj.group('mpx_id') | ||||
|             if not video_id.isdigit(): | ||||
|                 webpage = self._download_webpage(url, video_id) | ||||
|                 info = None | ||||
|                 bootstrap_json = self._search_regex( | ||||
|                     [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', | ||||
|                      r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], | ||||
|                     webpage, 'bootstrap json', default=None) | ||||
|                 bootstrap = self._parse_json( | ||||
|                     bootstrap_json, video_id, transform_source=unescapeHTML) | ||||
|                 if 'results' in bootstrap: | ||||
|                     info = bootstrap['results'][0]['video'] | ||||
|                 elif 'video' in bootstrap: | ||||
|                     info = bootstrap['video'] | ||||
|                 else: | ||||
|                     tbr = int_or_none(video_asset.get('bitRate') or video_asset.get('bitrate'), 1000) | ||||
|                     format_id = 'http%s' % ('-%d' % tbr if tbr else '') | ||||
|                     video_url = update_url_query( | ||||
|                         video_url, {'format': 'redirect'}) | ||||
|                     # resolve the url so that we can check availability and detect the correct extension | ||||
|                     head = self._request_webpage( | ||||
|                         HEADRequest(video_url), video_id, | ||||
|                         'Checking %s url' % format_id, | ||||
|                         '%s is not available' % format_id, | ||||
|                         fatal=False) | ||||
|                     if head: | ||||
|                         video_url = head.geturl() | ||||
|                         formats.append({ | ||||
|                             'format_id': format_id, | ||||
|                             'url': video_url, | ||||
|                             'width': int_or_none(video_asset.get('width')), | ||||
|                             'height': int_or_none(video_asset.get('height')), | ||||
|                             'tbr': tbr, | ||||
|                             'container': video_asset.get('format'), | ||||
|                         }) | ||||
|             self._sort_formats(formats) | ||||
|                     info = bootstrap | ||||
|                 video_id = info['mpxId'] | ||||
|  | ||||
|             return { | ||||
|                 '_type': 'url_transparent', | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'description': info.get('description'), | ||||
|                 'thumbnail': info.get('thumbnail'), | ||||
|                 'duration': int_or_none(info.get('duration')), | ||||
|                 'timestamp': parse_iso8601(info.get('pubDate') or info.get('pub_date')), | ||||
|                 'formats': formats, | ||||
|                 'subtitles': subtitles, | ||||
|                 # http://feed.theplatform.com/f/2E2eJC/nbcnews also works | ||||
|                 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id, | ||||
|                 'ie_key': 'ThePlatformFeed', | ||||
|             } | ||||
|  | ||||
|  | ||||
| class MSNBCIE(InfoExtractor): | ||||
|     # https URLs redirect to corresponding http ones | ||||
|     _VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', | ||||
|         'md5': '6d236bf4f3dddc226633ce6e2c3f814d', | ||||
|         'info_dict': { | ||||
|             'id': 'n_hayes_Aimm_140801_272214', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The chaotic GOP immigration vote', | ||||
|             'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1406937606, | ||||
|             'upload_date': '20140802', | ||||
|             'uploader': 'NBCU-NEWS', | ||||
|             'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         embed_url = self._html_search_meta('embedURL', webpage) | ||||
|         return self.url_result(embed_url) | ||||
|   | ||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/ninecninemedia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/ninecninemedia.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     parse_duration, | ||||
|     ExtractorError | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NineCNineMediaIE(InfoExtractor): | ||||
|     _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         destination_code, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id) | ||||
|         content = self._download_json(api_base_url, video_id, query={ | ||||
|             '$include': '[contentpackages]', | ||||
|         }) | ||||
|         title = content['Name'] | ||||
|         if len(content['ContentPackages']) > 1: | ||||
|             raise ExtractorError('multiple content packages') | ||||
|         content_package = content['ContentPackages'][0] | ||||
|         stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id'] | ||||
|         stacks = self._download_json(stacks_base_url, video_id)['Items'] | ||||
|         if len(stacks) > 1: | ||||
|             raise ExtractorError('multiple stacks') | ||||
|         stack = stacks[0] | ||||
|         stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id']) | ||||
|         formats = [] | ||||
|         formats.extend(self._extract_m3u8_formats( | ||||
|             stack_base_url + 'm3u8', video_id, 'mp4', | ||||
|             'm3u8_native', m3u8_id='hls', fatal=False)) | ||||
|         formats.extend(self._extract_f4m_formats( | ||||
|             stack_base_url + 'f4m', video_id, | ||||
|             f4m_id='hds', fatal=False)) | ||||
|         mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False) | ||||
|         if mp4_url: | ||||
|             formats.append({ | ||||
|                 'url': mp4_url, | ||||
|                 'format_id': 'mp4', | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': content.get('Desc') or content.get('ShortDesc'), | ||||
|             'timestamp': parse_iso8601(content.get('BroadcastDateTime')), | ||||
|             'duration': parse_duration(content.get('BroadcastTime')), | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -516,9 +516,14 @@ class PBSIE(InfoExtractor): | ||||
|                 # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications | ||||
|                 if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'): | ||||
|                     continue | ||||
|                 f_url = re.sub(r'\d+k|baseline', bitrate, http_url) | ||||
|                 # This may produce invalid links sometimes (e.g. | ||||
|                 # http://www.pbs.org/wgbh/frontline/film/suicide-plan) | ||||
|                 if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate): | ||||
|                     continue | ||||
|                 f = m3u8_format.copy() | ||||
|                 f.update({ | ||||
|                     'url': re.sub(r'\d+k|baseline', bitrate, http_url), | ||||
|                     'url': f_url, | ||||
|                     'format_id': m3u8_format['format_id'].replace('hls', 'http'), | ||||
|                     'protocol': 'http', | ||||
|                 }) | ||||
|   | ||||
| @@ -120,9 +120,12 @@ class PeriscopeUserIE(InfoExtractor): | ||||
|         title = user.get('display_name') or user.get('username') | ||||
|         description = user.get('description') | ||||
|  | ||||
|         broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or | ||||
|                          data_store.get('BroadcastCache', {}).get('broadcastIds', [])) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result( | ||||
|                 'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id'])) | ||||
|             for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])] | ||||
|                 'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id)) | ||||
|             for broadcast_id in broadcast_ids] | ||||
|  | ||||
|         return self.playlist_result(entries, user_id, title, description) | ||||
|   | ||||
| @@ -49,7 +49,7 @@ class PladformIE(InfoExtractor): | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src="(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage) | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|   | ||||
							
								
								
									
										95
									
								
								youtube_dl/extractor/polskieradio.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								youtube_dl/extractor/polskieradio.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,95 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse_unquote, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     strip_or_none, | ||||
|     unified_timestamp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PolskieRadioIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', | ||||
|         'info_dict': { | ||||
|             'id': '1587943', | ||||
|             'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', | ||||
|             'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'md5': '2984ee6ce9046d91fc233bc1a864a09a', | ||||
|             'info_dict': { | ||||
|                 'id': '1540576', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'md5:d4623290d4ac983bf924061c75c23a0d', | ||||
|                 'timestamp': 1456594200, | ||||
|                 'upload_date': '20160227', | ||||
|                 'duration': 2364, | ||||
|             }, | ||||
|         }], | ||||
|     }, { | ||||
|         'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal', | ||||
|         'info_dict': { | ||||
|             'id': '1635803', | ||||
|             'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał', | ||||
|             'description': 'md5:01cb7d0cad58664095d72b51a1ebada2', | ||||
|         }, | ||||
|         'playlist_mincount': 12, | ||||
|     }, { | ||||
|         'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # with mp4 video | ||||
|         'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         content = self._search_regex( | ||||
|             r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>', | ||||
|             webpage, 'content') | ||||
|  | ||||
|         timestamp = unified_timestamp(self._html_search_regex( | ||||
|             r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>', | ||||
|             webpage, 'timestamp', fatal=False)) | ||||
|  | ||||
|         entries = [] | ||||
|  | ||||
|         media_urls = set() | ||||
|  | ||||
|         for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content): | ||||
|             media = self._parse_json(data_media, playlist_id, fatal=False) | ||||
|             if not media.get('file') or not media.get('desc'): | ||||
|                 continue | ||||
|             media_url = self._proto_relative_url(media['file'], 'http:') | ||||
|             if media_url in media_urls: | ||||
|                 continue | ||||
|             media_urls.add(media_url) | ||||
|             entries.append({ | ||||
|                 'id': compat_str(media['id']), | ||||
|                 'url': media_url, | ||||
|                 'title': compat_urllib_parse_unquote(media['desc']), | ||||
|                 'duration': int_or_none(media.get('length')), | ||||
|                 'vcodec': 'none' if media.get('provider') == 'audio' else None, | ||||
|                 'timestamp': timestamp, | ||||
|             }) | ||||
|  | ||||
|         title = self._og_search_title(webpage).strip() | ||||
|         description = strip_or_none(self._og_search_description(webpage)) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
| @@ -25,7 +25,15 @@ from ..aes import ( | ||||
|  | ||||
|  | ||||
| class PornHubIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)' | ||||
|     IE_DESC = 'PornHub and Thumbzilla' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)| | ||||
|                             (?:www\.)?thumbzilla\.com/video/ | ||||
|                         ) | ||||
|                         (?P<id>[0-9a-z]+) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', | ||||
|         'md5': '1e19b41231a02eba417839222ac9d58e', | ||||
| @@ -63,8 +71,20 @@ class PornHubIE(InfoExtractor): | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # removed at the request of cam4.com | ||||
|         'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # removed at the request of the copyright owner | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # removed by uploader | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
| @@ -87,8 +107,8 @@ class PornHubIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         error_msg = self._html_search_regex( | ||||
|             r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>', | ||||
|             webpage, 'error message', default=None) | ||||
|             r'(?s)<div[^>]+class=(["\']).*?\bremoved\b.*?\1[^>]*>(?P<error>.+?)</div>', | ||||
|             webpage, 'error message', default=None, group='error') | ||||
|         if error_msg: | ||||
|             error_msg = re.sub(r'\s+', ' ', error_msg) | ||||
|             raise ExtractorError( | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import( | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     str_to_int, | ||||
| ) | ||||
|   | ||||
| @@ -1,23 +1,23 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     js_to_json, | ||||
| ) | ||||
| from ..compat import compat_str | ||||
|  | ||||
|  | ||||
| class RDSIE(InfoExtractor): | ||||
|     IE_DESC = 'RDS.ca' | ||||
|     _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799', | ||||
|         'info_dict': { | ||||
|             'id': '3.1132799', | ||||
|             'id': '604333', | ||||
|             'display_id': 'fowler-jr-prend-la-direction-de-jacksonville', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Fowler Jr. prend la direction de Jacksonville', | ||||
| @@ -33,22 +33,17 @@ class RDSIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         # TODO: extract f4m from 9c9media.com | ||||
|         video_url = self._search_regex( | ||||
|             r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"', | ||||
|             webpage, 'video url') | ||||
|  | ||||
|         title = self._og_search_title(webpage) or self._html_search_meta( | ||||
|         item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json) | ||||
|         video_id = compat_str(item['id']) | ||||
|         title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta( | ||||
|             'title', webpage, 'title', fatal=True) | ||||
|         description = self._og_search_description(webpage) or self._html_search_meta( | ||||
|             'description', webpage, 'description') | ||||
|         thumbnail = self._og_search_thumbnail(webpage) or self._search_regex( | ||||
|         thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex( | ||||
|             [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', | ||||
|              r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
| @@ -61,13 +56,15 @@ class RDSIE(InfoExtractor): | ||||
|         age_limit = self._family_friendly_search(webpage) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'url': '9c9media:rds_web:%s' % video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'age_limit': age_limit, | ||||
|             'ie_key': 'NineCNineMedia', | ||||
|         } | ||||
|   | ||||
							
								
								
									
										60
									
								
								youtube_dl/extractor/sixplay.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								youtube_dl/extractor/sixplay.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     qualities, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SixPlayIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320', | ||||
|         'md5': '42310bffe4ba3982db112b9cd3467328', | ||||
|         'info_dict': { | ||||
|             'id': '11495320', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Jamel et ses amis au Marrakech du rire 2015', | ||||
|             'description': 'md5:ba2149d5c321d5201b78070ee839d872', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         clip_data = self._download_json( | ||||
|             'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id, | ||||
|             video_id) | ||||
|         video_data = clip_data['videoInfo'] | ||||
|  | ||||
|         quality_key = qualities(['lq', 'sd', 'hq', 'hd']) | ||||
|         formats = [] | ||||
|         for source in clip_data['sources']: | ||||
|             source_type, source_url = source.get('type'), source.get('src') | ||||
|             if not source_url or source_type == 'hls/primetime': | ||||
|                 continue | ||||
|             if source_type == 'application/vnd.apple.mpegURL': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     source_url, video_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     source_url.replace('.m3u8', '.f4m'), | ||||
|                     video_id, f4m_id='hds', fatal=False)) | ||||
|             elif source_type == 'video/mp4': | ||||
|                 quality = source.get('quality') | ||||
|                 formats.append({ | ||||
|                     'url': source_url, | ||||
|                     'format_id': quality, | ||||
|                     'quality': quality_key(quality), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_data['title'].strip(), | ||||
|             'description': video_data.get('description'), | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'series': video_data.get('titlePgm'), | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -67,7 +67,7 @@ class SkyNewsArabiaIE(SkyNewsArabiaBaseIE): | ||||
|  | ||||
|  | ||||
| class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE): | ||||
|     IE_NAME = 'skynewsarabia:video' | ||||
|     IE_NAME = 'skynewsarabia:article' | ||||
|     _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9', | ||||
|   | ||||
							
								
								
									
										33
									
								
								youtube_dl/extractor/skysports.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								youtube_dl/extractor/skysports.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SkySportsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', | ||||
|         'md5': 'c44a1db29f27daf9a0003e010af82100', | ||||
|         'info_dict': { | ||||
|             'id': '10328419', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Bale: Its our time to shine', | ||||
|             'description': 'md5:9fd1de3614d525f5addda32ac3c482c9', | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': 'ooyala:%s' % self._search_regex( | ||||
|                 r'data-video-id="([^"]+)"', webpage, 'ooyala id'), | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'ie_key': 'Ooyala', | ||||
|         } | ||||
| @@ -9,6 +9,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class SRMediathekIE(ARDMediathekIE): | ||||
|     IE_NAME = 'sr:mediathek' | ||||
|     IE_DESC = 'Saarländischer Rundfunk' | ||||
|     _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)' | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,6 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     sanitized_Request, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
| @@ -45,20 +44,26 @@ class StreamcloudIE(InfoExtractor): | ||||
|             (?:id="[^"]+"\s+)? | ||||
|             value="([^"]*)" | ||||
|             ''', orig_webpage) | ||||
|         post = urlencode_postdata(fields) | ||||
|  | ||||
|         self._sleep(12, video_id) | ||||
|         headers = { | ||||
|             b'Content-Type': b'application/x-www-form-urlencoded', | ||||
|         } | ||||
|         req = sanitized_Request(url, post, headers) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             req, video_id, note='Downloading video page ...') | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1[^>]*>([^<]+)<', webpage, 'title') | ||||
|         video_url = self._search_regex( | ||||
|             r'file:\s*"([^"]+)"', webpage, 'video URL') | ||||
|             url, video_id, data=urlencode_postdata(fields), headers={ | ||||
|                 b'Content-Type': b'application/x-www-form-urlencoded', | ||||
|             }) | ||||
|  | ||||
|         try: | ||||
|             title = self._html_search_regex( | ||||
|                 r'<h1[^>]*>([^<]+)<', webpage, 'title') | ||||
|             video_url = self._search_regex( | ||||
|                 r'file:\s*"([^"]+)"', webpage, 'video URL') | ||||
|         except ExtractorError: | ||||
|             message = self._html_search_regex( | ||||
|                 r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>', | ||||
|                 webpage, 'message', default=None, group='message') | ||||
|             if message: | ||||
|                 raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) | ||||
|             raise | ||||
|         thumbnail = self._search_regex( | ||||
|             r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False) | ||||
|  | ||||
|   | ||||
| @@ -6,17 +6,14 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     dict_get, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SVTBaseIE(InfoExtractor): | ||||
|     def _extract_video(self, url, video_id): | ||||
|         info = self._download_json(url, video_id) | ||||
|  | ||||
|         title = info['context']['title'] | ||||
|         thumbnail = info['context'].get('thumbnailImage') | ||||
|  | ||||
|         video_info = info['video'] | ||||
|     def _extract_video(self, video_info, video_id): | ||||
|         formats = [] | ||||
|         for vr in video_info['videoReferences']: | ||||
|             player_type = vr.get('playerType') | ||||
| @@ -40,27 +37,49 @@ class SVTBaseIE(InfoExtractor): | ||||
|                     'format_id': player_type, | ||||
|                     'url': vurl, | ||||
|                 }) | ||||
|         if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): | ||||
|             self.raise_geo_restricted('This video is only available in Sweden') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         subtitle_references = video_info.get('subtitleReferences') | ||||
|         subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences')) | ||||
|         if isinstance(subtitle_references, list): | ||||
|             for sr in subtitle_references: | ||||
|                 subtitle_url = sr.get('url') | ||||
|                 subtitle_lang = sr.get('language', 'sv') | ||||
|                 if subtitle_url: | ||||
|                     subtitles.setdefault('sv', []).append({'url': subtitle_url}) | ||||
|                     if determine_ext(subtitle_url) == 'm3u8': | ||||
|                         # TODO(yan12125): handle WebVTT in m3u8 manifests | ||||
|                         continue | ||||
|  | ||||
|         duration = video_info.get('materialLength') | ||||
|         age_limit = 18 if video_info.get('inappropriateForChildren') else 0 | ||||
|                     subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) | ||||
|  | ||||
|         title = video_info.get('title') | ||||
|  | ||||
|         series = video_info.get('programTitle') | ||||
|         season_number = int_or_none(video_info.get('season')) | ||||
|         episode = video_info.get('episodeTitle') | ||||
|         episode_number = int_or_none(video_info.get('episodeNumber')) | ||||
|  | ||||
|         duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) | ||||
|         age_limit = None | ||||
|         adult = dict_get( | ||||
|             video_info, ('inappropriateForChildren', 'blockedForChildren'), | ||||
|             skip_false_values=False) | ||||
|         if adult is not None: | ||||
|             age_limit = 18 if adult else 0 | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'age_limit': age_limit, | ||||
|             'series': series, | ||||
|             'season_number': season_number, | ||||
|             'episode': episode, | ||||
|             'episode_number': episode_number, | ||||
|         } | ||||
|  | ||||
|  | ||||
| @@ -68,11 +87,11 @@ class SVTIE(SVTBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', | ||||
|         'md5': '9648197555fc1b49e3dc22db4af51d46', | ||||
|         'md5': '33e9a5d8f646523ce0868ecfb0eed77d', | ||||
|         'info_dict': { | ||||
|             'id': '2900353', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Stjärnorna skojar till det - under SVT-intervjun', | ||||
|             'duration': 27, | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
| @@ -89,15 +108,20 @@ class SVTIE(SVTBaseIE): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         widget_id = mobj.group('widget_id') | ||||
|         article_id = mobj.group('id') | ||||
|         return self._extract_video( | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), | ||||
|             article_id) | ||||
|  | ||||
|         info_dict = self._extract_video(info['video'], article_id) | ||||
|         info_dict['title'] = info['context']['title'] | ||||
|         return info_dict | ||||
|  | ||||
|  | ||||
| class SVTPlayIE(SVTBaseIE): | ||||
|     IE_DESC = 'SVT Play and Öppet arkiv' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', | ||||
|         'md5': '2b6704fe4a28801e1a098bbf3c5ac611', | ||||
|         'info_dict': { | ||||
| @@ -113,12 +137,50 @@ class SVTPlayIE(SVTBaseIE): | ||||
|                 }] | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         # geo restricted to Sweden | ||||
|         'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         host = mobj.group('host') | ||||
|         return self._extract_video( | ||||
|             'http://www.%s.se/video/%s?output=json' % (host, video_id), | ||||
|             video_id) | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         data = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'root\["__svtplay"\]\s*=\s*([^;]+);', | ||||
|                 webpage, 'embedded data', default='{}'), | ||||
|             video_id, fatal=False) | ||||
|  | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         if data: | ||||
|             video_info = try_get( | ||||
|                 data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], | ||||
|                 dict) | ||||
|             if video_info: | ||||
|                 info_dict = self._extract_video(video_info, video_id) | ||||
|                 info_dict.update({ | ||||
|                     'title': data['context']['dispatcher']['stores']['MetaStore']['title'], | ||||
|                     'thumbnail': thumbnail, | ||||
|                 }) | ||||
|                 return info_dict | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', | ||||
|             webpage, 'video id', default=None) | ||||
|  | ||||
|         if video_id: | ||||
|             data = self._download_json( | ||||
|                 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) | ||||
|             info_dict = self._extract_video(data, video_id) | ||||
|             if not info_dict.get('title'): | ||||
|                 info_dict['title'] = re.sub( | ||||
|                     r'\s*\|\s*.+?$', '', | ||||
|                     info_dict.get('episode') or self._og_search_title(webpage)) | ||||
|             return info_dict | ||||
|   | ||||
| @@ -48,6 +48,6 @@ class TF1IE(InfoExtractor): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         wat_id = self._html_search_regex( | ||||
|             r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1', | ||||
|             r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1', | ||||
|             webpage, 'wat id', group='id') | ||||
|         return self.url_result('wat:%s' % wat_id, 'Wat') | ||||
|   | ||||
| @@ -277,9 +277,9 @@ class ThePlatformIE(ThePlatformBaseIE): | ||||
|  | ||||
|  | ||||
| class ThePlatformFeedIE(ThePlatformBaseIE): | ||||
|     _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s' | ||||
|     _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)' | ||||
|     _TEST = { | ||||
|     _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s' | ||||
|     _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))' | ||||
|     _TESTS = [{ | ||||
|         # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207 | ||||
|         'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207', | ||||
|         'md5': '6e32495b5073ab414471b615c5ded394', | ||||
| @@ -295,32 +295,38 @@ class ThePlatformFeedIE(ThePlatformBaseIE): | ||||
|             'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'], | ||||
|             'uploader': 'NBCU-NEWS', | ||||
|         }, | ||||
|     } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         provider_id = mobj.group('provider_id') | ||||
|         feed_id = mobj.group('feed_id') | ||||
|  | ||||
|         real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id) | ||||
|         feed = self._download_json(real_url, video_id) | ||||
|         entry = feed['entries'][0] | ||||
|     def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}): | ||||
|         real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query) | ||||
|         entry = self._download_json(real_url, video_id)['entries'][0] | ||||
|  | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|         first_video_id = None | ||||
|         duration = None | ||||
|         asset_types = [] | ||||
|         for item in entry['media$content']: | ||||
|             smil_url = item['plfile$url'] + '&mbr=true' | ||||
|             smil_url = item['plfile$url'] | ||||
|             cur_video_id = ThePlatformIE._match_id(smil_url) | ||||
|             if first_video_id is None: | ||||
|                 first_video_id = cur_video_id | ||||
|                 duration = float_or_none(item.get('plfile$duration')) | ||||
|             cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id) | ||||
|             formats.extend(cur_formats) | ||||
|             subtitles = self._merge_subtitles(subtitles, cur_subtitles) | ||||
|             for asset_type in item['plfile$assetTypes']: | ||||
|                 if asset_type in asset_types: | ||||
|                     continue | ||||
|                 asset_types.append(asset_type) | ||||
|                 query = { | ||||
|                     'mbr': 'true', | ||||
|                     'formats': item['plfile$format'], | ||||
|                     'assetTypes': asset_type, | ||||
|                 } | ||||
|                 if asset_type in asset_types_query: | ||||
|                     query.update(asset_types_query[asset_type]) | ||||
|                 cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query( | ||||
|                     smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type) | ||||
|                 formats.extend(cur_formats) | ||||
|                 subtitles = self._merge_subtitles(subtitles, cur_subtitles) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -344,5 +350,17 @@ class ThePlatformFeedIE(ThePlatformBaseIE): | ||||
|             'timestamp': timestamp, | ||||
|             'categories': categories, | ||||
|         }) | ||||
|         if custom_fields: | ||||
|             ret.update(custom_fields(entry)) | ||||
|  | ||||
|         return ret | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         provider_id = mobj.group('provider_id') | ||||
|         feed_id = mobj.group('feed_id') | ||||
|         filter_query = mobj.group('filter') | ||||
|  | ||||
|         return self._extract_feed_info(provider_id, feed_id, filter_query, video_id) | ||||
|   | ||||
| @@ -4,6 +4,12 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     clean_html, | ||||
|     get_element_by_attribute, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TVPIE(InfoExtractor): | ||||
| @@ -21,7 +27,7 @@ class TVPIE(InfoExtractor): | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', | ||||
|         'md5': 'c3b15ed1af288131115ff17a17c19dda', | ||||
|         'md5': 'b0005b542e5b4de643a9690326ab1257', | ||||
|         'info_dict': { | ||||
|             'id': '17916176', | ||||
|             'ext': 'mp4', | ||||
| @@ -53,6 +59,11 @@ class TVPIE(InfoExtractor): | ||||
|         webpage = self._download_webpage( | ||||
|             'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id) | ||||
|  | ||||
|         error_massage = get_element_by_attribute('class', 'msg error', webpage) | ||||
|         if error_massage: | ||||
|             raise ExtractorError('%s said: %s' % ( | ||||
|                 self.IE_NAME, clean_html(error_massage)), expected=True) | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1', | ||||
|             webpage, 'title', group='title') | ||||
| @@ -66,24 +77,50 @@ class TVPIE(InfoExtractor): | ||||
|             r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None) | ||||
|         if not video_url: | ||||
|             r'0:{src:([\'"])(?P<url>.*?)\1', webpage, | ||||
|             'formats', group='url', default=None) | ||||
|         if not video_url or 'material_niedostepny.mp4' in video_url: | ||||
|             video_url = self._download_json( | ||||
|                 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id, | ||||
|                 video_id)['video_url'] | ||||
|  | ||||
|         ext = video_url.rsplit('.', 1)[-1] | ||||
|         if ext != 'ism/manifest': | ||||
|             if '/' in ext: | ||||
|                 ext = 'mp4' | ||||
|         formats = [] | ||||
|         video_url_base = self._search_regex( | ||||
|             r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)', | ||||
|             video_url, 'video base url', default=None) | ||||
|         if video_url_base: | ||||
|             # TODO: Current DASH formats are broken - $Time$ pattern in | ||||
|             # <SegmentTemplate> not implemented yet | ||||
|             # formats.extend(self._extract_mpd_formats( | ||||
|             #     video_url_base + '.ism/video.mpd', | ||||
|             #     video_id, mpd_id='dash', fatal=False)) | ||||
|             formats.extend(self._extract_f4m_formats( | ||||
|                 video_url_base + '.ism/video.f4m', | ||||
|                 video_id, f4m_id='hds', fatal=False)) | ||||
|             m3u8_formats = self._extract_m3u8_formats( | ||||
|                 video_url_base + '.ism/video.m3u8', video_id, | ||||
|                 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) | ||||
|             self._sort_formats(m3u8_formats) | ||||
|             m3u8_formats = list(filter( | ||||
|                 lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', | ||||
|                 m3u8_formats)) | ||||
|             formats.extend(m3u8_formats) | ||||
|             for i, m3u8_format in enumerate(m3u8_formats, 2): | ||||
|                 http_url = '%s-%d.mp4' % (video_url_base, i) | ||||
|                 if self._is_valid_url(http_url, video_id): | ||||
|                     f = m3u8_format.copy() | ||||
|                     f.update({ | ||||
|                         'url': http_url, | ||||
|                         'format_id': f['format_id'].replace('hls', 'http'), | ||||
|                         'protocol': 'http', | ||||
|                     }) | ||||
|                     formats.append(f) | ||||
|         else: | ||||
|             formats = [{ | ||||
|                 'format_id': 'direct', | ||||
|                 'url': video_url, | ||||
|                 'ext': ext, | ||||
|                 'ext': determine_ext(video_url, 'mp4'), | ||||
|             }] | ||||
|         else: | ||||
|             m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url) | ||||
|             formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ class TwitchBaseIE(InfoExtractor): | ||||
|     _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv' | ||||
|  | ||||
|     _API_BASE = 'https://api.twitch.tv' | ||||
|     _USHER_BASE = 'http://usher.twitch.tv' | ||||
|     _USHER_BASE = 'https://usher.ttvnw.net' | ||||
|     _LOGIN_URL = 'http://www.twitch.tv/login' | ||||
|     _NETRC_MACHINE = 'twitch' | ||||
|  | ||||
|   | ||||
							
								
								
									
										67
									
								
								youtube_dl/extractor/urplay.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								youtube_dl/extractor/urplay.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class URPlayIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?urplay\.se/program/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde', | ||||
|         'md5': '15ca67b63fd8fb320ac2bcd854bad7b6', | ||||
|         'info_dict': { | ||||
|             'id': '190031', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tripp, Trapp, Träd : Sovkudde', | ||||
|             'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         urplayer_data = self._parse_json(self._search_regex( | ||||
|             r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id) | ||||
|         host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] | ||||
|  | ||||
|         formats = [] | ||||
|         for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)): | ||||
|             file_rtmp = urplayer_data.get('file_rtmp' + quality_attr) | ||||
|             if file_rtmp: | ||||
|                 formats.append({ | ||||
|                     'url': 'rtmp://%s/urplay/mp4:%s' % (host, file_rtmp), | ||||
|                     'format_id': quality + '-rtmp', | ||||
|                     'ext': 'flv', | ||||
|                     'preference': preference, | ||||
|                 }) | ||||
|             file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr) | ||||
|             if file_http: | ||||
|                 file_http_base_url = 'http://%s/%s' % (host, file_http) | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     file_http_base_url + 'manifest.f4m', video_id, | ||||
|                     preference, '%s-hds' % quality, fatal=False)) | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     file_http_base_url + 'playlist.m3u8', video_id, 'mp4', | ||||
|                     'm3u8_native', preference, '%s-hls' % quality, fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for subtitle in urplayer_data.get('subtitles', []): | ||||
|             subtitle_url = subtitle.get('file') | ||||
|             kind = subtitle.get('kind') | ||||
|             if subtitle_url or kind and kind != 'captions': | ||||
|                 continue | ||||
|             subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({ | ||||
|                 'url': subtitle_url, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': urplayer_data['title'], | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': urplayer_data.get('image'), | ||||
|             'series': urplayer_data.get('series_title'), | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/vidbit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/vidbit.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     remove_end, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VidbitIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2', | ||||
|         'md5': '1a34b7f14defe3b8fafca9796892924d', | ||||
|         'info_dict': { | ||||
|             'id': 'jkL2yDOEq2', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Intro to VidBit', | ||||
|             'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7', | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'upload_date': '20160618', | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id) | ||||
|  | ||||
|         video_url, title = [None] * 2 | ||||
|  | ||||
|         config = self._parse_json(self._search_regex( | ||||
|             r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'), | ||||
|             video_id, transform_source=js_to_json) | ||||
|         if config: | ||||
|             if config.get('file'): | ||||
|                 video_url = compat_urlparse.urljoin(url, config['file']) | ||||
|             title = config.get('title') | ||||
|  | ||||
|         if not video_url: | ||||
|             video_url = compat_urlparse.urljoin(url, self._search_regex( | ||||
|                 r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', | ||||
|                 webpage, 'video URL', group='url')) | ||||
|  | ||||
|         if not title: | ||||
|             title = remove_end( | ||||
|                 self._html_search_regex( | ||||
|                     (r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'), | ||||
|                     webpage, 'title', default=None) or self._og_search_title(webpage), | ||||
|                 ' - VidBit') | ||||
|  | ||||
|         description = self._html_search_meta( | ||||
|             ('description', 'og:description', 'twitter:description'), | ||||
|             webpage, 'description') | ||||
|  | ||||
|         upload_date = unified_strdate(self._html_search_meta( | ||||
|             'datePublished', webpage, 'upload date')) | ||||
|  | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'<strong>(\d+)</strong> views', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'id=["\']cmt_num["\'][^>]*>\((\d+)\)', | ||||
|             webpage, 'comment count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'upload_date': upload_date, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
| @@ -16,6 +16,7 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     InAdvancePagedList, | ||||
|     int_or_none, | ||||
|     NO_DEFAULT, | ||||
|     RegexNotFoundError, | ||||
|     sanitized_Request, | ||||
|     smuggle_url, | ||||
| @@ -56,6 +57,26 @@ class VimeoBaseInfoExtractor(InfoExtractor): | ||||
|         self._set_vimeo_cookie('vuid', vuid) | ||||
|         self._download_webpage(login_request, None, False, 'Wrong login info') | ||||
|  | ||||
|     def _verify_video_password(self, url, video_id, webpage): | ||||
|         password = self._downloader.params.get('videopassword') | ||||
|         if password is None: | ||||
|             raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) | ||||
|         token, vuid = self._extract_xsrft_and_vuid(webpage) | ||||
|         data = urlencode_postdata({ | ||||
|             'password': password, | ||||
|             'token': token, | ||||
|         }) | ||||
|         if url.startswith('http://'): | ||||
|             # vimeo only supports https now, but the user can give an http url | ||||
|             url = url.replace('http://', 'https://') | ||||
|         password_request = sanitized_Request(url + '/password', data) | ||||
|         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         password_request.add_header('Referer', url) | ||||
|         self._set_vimeo_cookie('vuid', vuid) | ||||
|         return self._download_webpage( | ||||
|             password_request, video_id, | ||||
|             'Verifying the password', 'Wrong password') | ||||
|  | ||||
|     def _extract_xsrft_and_vuid(self, webpage): | ||||
|         xsrft = self._search_regex( | ||||
|             r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)', | ||||
| @@ -146,7 +167,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|                             \. | ||||
|                         )? | ||||
|                         vimeo(?P<pro>pro)?\.com/ | ||||
|                         (?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/) | ||||
|                         (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) | ||||
|                         (?:.*?/)? | ||||
|                         (?: | ||||
|                             (?: | ||||
| @@ -227,8 +248,6 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         { | ||||
|             'url': 'http://vimeo.com/channels/keypeele/75629013', | ||||
|             'md5': '2f86a05afe9d7abc0b9126d229bbe15d', | ||||
|             'note': 'Video is freely available via original URL ' | ||||
|                     'and protected with password when accessed via http://vimeo.com/75629013', | ||||
|             'info_dict': { | ||||
|                 'id': '75629013', | ||||
|                 'ext': 'mp4', | ||||
| @@ -272,7 +291,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         { | ||||
|             # contains original format | ||||
|             'url': 'https://vimeo.com/33951933', | ||||
|             'md5': '53c688fa95a55bf4b7293d37a89c5c53', | ||||
|             'md5': '2d9f5475e0537f013d0073e812ab89e6', | ||||
|             'info_dict': { | ||||
|                 'id': '33951933', | ||||
|                 'ext': 'mp4', | ||||
| @@ -284,6 +303,29 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|                 'description': 'md5:ae23671e82d05415868f7ad1aec21147', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # only available via https://vimeo.com/channels/tributes/6213729 and | ||||
|             # not via https://vimeo.com/6213729 | ||||
|             'url': 'https://vimeo.com/channels/tributes/6213729', | ||||
|             'info_dict': { | ||||
|                 'id': '6213729', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Vimeo Tribute: The Shining', | ||||
|                 'uploader': 'Casey Donahue', | ||||
|                 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/caseydonahue', | ||||
|                 'uploader_id': 'caseydonahue', | ||||
|                 'upload_date': '20090821', | ||||
|                 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'expected_warnings': ['Unable to download JSON metadata'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://vimeo.com/109815029', | ||||
|             'note': 'Video not completely processed, "failed" seed status', | ||||
| @@ -293,6 +335,10 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|             'url': 'https://vimeo.com/groups/travelhd/videos/22439234', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://vimeo.com/album/2632481/video/79010983', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             # source file returns 403: Forbidden | ||||
|             'url': 'https://vimeo.com/7809605', | ||||
| @@ -319,26 +365,6 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         if mobj: | ||||
|             return mobj.group(1) | ||||
|  | ||||
|     def _verify_video_password(self, url, video_id, webpage): | ||||
|         password = self._downloader.params.get('videopassword') | ||||
|         if password is None: | ||||
|             raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) | ||||
|         token, vuid = self._extract_xsrft_and_vuid(webpage) | ||||
|         data = urlencode_postdata({ | ||||
|             'password': password, | ||||
|             'token': token, | ||||
|         }) | ||||
|         if url.startswith('http://'): | ||||
|             # vimeo only supports https now, but the user can give an http url | ||||
|             url = url.replace('http://', 'https://') | ||||
|         password_request = sanitized_Request(url + '/password', data) | ||||
|         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         password_request.add_header('Referer', url) | ||||
|         self._set_vimeo_cookie('vuid', vuid) | ||||
|         return self._download_webpage( | ||||
|             password_request, video_id, | ||||
|             'Verifying the password', 'Wrong password') | ||||
|  | ||||
|     def _verify_player_video_password(self, url, video_id): | ||||
|         password = self._downloader.params.get('videopassword') | ||||
|         if password is None: | ||||
| @@ -369,7 +395,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         orig_url = url | ||||
|         if mobj.group('pro') or mobj.group('player'): | ||||
|             url = 'https://player.vimeo.com/video/' + video_id | ||||
|         else: | ||||
|         elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): | ||||
|             url = 'https://vimeo.com/' + video_id | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
| @@ -630,8 +656,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): | ||||
|                 webpage = self._login_list_password(page_url, list_id, webpage) | ||||
|                 yield self._extract_list_title(webpage) | ||||
|  | ||||
|             for video_id in re.findall(r'id="clip_(\d+?)"', webpage): | ||||
|                 yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo') | ||||
|             # Try extracting href first since not all videos are available via | ||||
|             # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) | ||||
|             clips = re.findall( | ||||
|                 r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage) | ||||
|             if clips: | ||||
|                 for video_id, video_url in clips: | ||||
|                     yield self.url_result( | ||||
|                         compat_urlparse.urljoin(base_url, video_url), | ||||
|                         VimeoIE.ie_key(), video_id=video_id) | ||||
|             # More relaxed fallback | ||||
|             else: | ||||
|                 for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): | ||||
|                     yield self.url_result( | ||||
|                         'https://vimeo.com/%s' % video_id, | ||||
|                         VimeoIE.ie_key(), video_id=video_id) | ||||
|  | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | ||||
|                 break | ||||
| @@ -668,7 +707,7 @@ class VimeoUserIE(VimeoChannelIE): | ||||
|  | ||||
| class VimeoAlbumIE(VimeoChannelIE): | ||||
|     IE_NAME = 'vimeo:album' | ||||
|     _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))' | ||||
|     _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://vimeo.com/album/2632481', | ||||
| @@ -688,6 +727,13 @@ class VimeoAlbumIE(VimeoChannelIE): | ||||
|         'params': { | ||||
|             'videopassword': 'youtube-dl', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # TODO: respect page number | ||||
|         'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _page_url(self, base_url, pagenum): | ||||
| @@ -746,12 +792,39 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'uploader_id': 'user22258446', | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'Password protected', | ||||
|         'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde', | ||||
|         'info_dict': { | ||||
|             'id': '138823582', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1', | ||||
|             'uploader': 'TMB', | ||||
|             'uploader_id': 'user37284429', | ||||
|         }, | ||||
|         'params': { | ||||
|             'videopassword': 'holygrail', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _get_config_url(self, webpage_url, video_id, video_password_verified=False): | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|         config_url = self._html_search_regex( | ||||
|             r'data-config-url="([^"]+)"', webpage, 'config URL', | ||||
|             default=NO_DEFAULT if video_password_verified else None) | ||||
|         if config_url is None: | ||||
|             self._verify_video_password(webpage_url, video_id, webpage) | ||||
|             config_url = self._get_config_url( | ||||
|                 webpage_url, video_id, video_password_verified=True) | ||||
|         return config_url | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         config = self._download_json( | ||||
|             'https://player.vimeo.com/video/%s/config' % video_id, video_id) | ||||
|         config_url = self._get_config_url(url, video_id) | ||||
|         config = self._download_json(config_url, video_id) | ||||
|         info_dict = self._parse_config(config, video_id) | ||||
|         self._vimeo_sort_formats(info_dict['formats']) | ||||
|         info_dict['id'] = video_id | ||||
|   | ||||
| @@ -24,6 +24,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20130519', | ||||
|             'uploader': 'Jack Dorsey', | ||||
|             'uploader_id': '76', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -39,6 +40,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20140815', | ||||
|             'uploader': 'Mars Ruiz', | ||||
|             'uploader_id': '1102363502380728320', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -54,6 +56,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20130430', | ||||
|             'uploader': 'Z3k3', | ||||
|             'uploader_id': '936470460173008896', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -71,6 +74,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20150705', | ||||
|             'uploader': 'Pimry_zaa', | ||||
|             'uploader_id': '1135760698325307392', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -86,10 +90,12 @@ class VineIE(InfoExtractor): | ||||
|  | ||||
|         data = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'window\.POST_DATA\s*=\s*{\s*%s\s*:\s*({.+?})\s*};\s*</script>' % video_id, | ||||
|                 r'window\.POST_DATA\s*=\s*({.+?});\s*</script>', | ||||
|                 webpage, 'vine data'), | ||||
|             video_id) | ||||
|  | ||||
|         data = data[list(data.keys())[0]] | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': '%(format)s-%(rate)s' % f, | ||||
|             'vcodec': f.get('format'), | ||||
| @@ -109,6 +115,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': unified_strdate(data.get('created')), | ||||
|             'uploader': username, | ||||
|             'uploader_id': data.get('userIdStr'), | ||||
|             'view_count': int_or_none(data.get('loops', {}).get('count')), | ||||
|             'like_count': int_or_none(data.get('likes', {}).get('count')), | ||||
|             'comment_count': int_or_none(data.get('comments', {}).get('count')), | ||||
|             'repost_count': int_or_none(data.get('reposts', {}).get('count')), | ||||
|   | ||||
| @@ -3,6 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import sys | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| @@ -10,7 +11,6 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     orderedSet, | ||||
|     sanitized_Request, | ||||
|     str_to_int, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
| @@ -27,12 +27,12 @@ class VKIE(InfoExtractor): | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             (?: | ||||
|                                 (?:m\.)?vk\.com/video_| | ||||
|                                 (?:(?:m|new)\.)?vk\.com/video_| | ||||
|                                 (?:www\.)?daxab.com/ | ||||
|                             ) | ||||
|                             ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)| | ||||
|                             (?: | ||||
|                                 (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video| | ||||
|                                 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video| | ||||
|                                 (?:www\.)?daxab.com/embed/ | ||||
|                             ) | ||||
|                             (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))? | ||||
| @@ -182,6 +182,10 @@ class VKIE(InfoExtractor): | ||||
|             # pladform embed | ||||
|             'url': 'https://vk.com/video-76116461_171554880', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://new.vk.com/video205387401_165548505', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -190,7 +194,7 @@ class VKIE(InfoExtractor): | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|         login_page, url_handle = self._download_webpage_handle( | ||||
|             'https://vk.com', None, 'Downloading login page') | ||||
|  | ||||
|         login_form = self._hidden_inputs(login_page) | ||||
| @@ -200,11 +204,26 @@ class VKIE(InfoExtractor): | ||||
|             'pass': password.encode('cp1251'), | ||||
|         }) | ||||
|  | ||||
|         request = sanitized_Request( | ||||
|             'https://login.vk.com/?act=login', | ||||
|             urlencode_postdata(login_form)) | ||||
|         # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header | ||||
|         # and expects the first one to be set rather than second (see | ||||
|         # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201). | ||||
|         # As of RFC6265 the newer one cookie should be set into cookie store | ||||
|         # what actually happens. | ||||
|         # We will workaround this VK issue by resetting the remixlhk cookie to | ||||
|         # the first one manually. | ||||
|         cookies = url_handle.headers.get('Set-Cookie') | ||||
|         if sys.version_info[0] >= 3: | ||||
|             cookies = cookies.encode('iso-8859-1') | ||||
|         cookies = cookies.decode('utf-8') | ||||
|         remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) | ||||
|         if remixlhk: | ||||
|             value, domain = remixlhk.groups() | ||||
|             self._set_cookie(domain, 'remixlhk', value) | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             request, None, note='Logging in as %s' % username) | ||||
|             'https://login.vk.com/?act=login', None, | ||||
|             note='Logging in as %s' % username, | ||||
|             data=urlencode_postdata(login_form)) | ||||
|  | ||||
|         if re.search(r'onLoginFailed', login_page): | ||||
|             raise ExtractorError( | ||||
| @@ -339,7 +358,7 @@ class VKIE(InfoExtractor): | ||||
| class VKUserVideosIE(InfoExtractor): | ||||
|     IE_NAME = 'vk:uservideos' | ||||
|     IE_DESC = "VK - User's Videos" | ||||
|     _VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' | ||||
|     _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' | ||||
|     _TEMPLATE_URL = 'https://vk.com/videos' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://vk.com/videos205387401', | ||||
| @@ -354,6 +373,12 @@ class VKUserVideosIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://vk.com/videos-97664626?section=all', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://m.vk.com/videos205387401', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://new.vk.com/videos205387401', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -25,7 +25,8 @@ class VRTIE(InfoExtractor): | ||||
|                 'timestamp': 1414271750.949, | ||||
|                 'upload_date': '20141025', | ||||
|                 'duration': 929, | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'HTTP Error 404: Not Found', | ||||
|         }, | ||||
|         # sporza.be | ||||
|         { | ||||
| @@ -39,7 +40,8 @@ class VRTIE(InfoExtractor): | ||||
|                 'timestamp': 1413835980.560, | ||||
|                 'upload_date': '20141020', | ||||
|                 'duration': 3238, | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'HTTP Error 404: Not Found', | ||||
|         }, | ||||
|         # cobra.be | ||||
|         { | ||||
| @@ -53,16 +55,39 @@ class VRTIE(InfoExtractor): | ||||
|                 'timestamp': 1413967500.494, | ||||
|                 'upload_date': '20141022', | ||||
|                 'duration': 661, | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'HTTP Error 404: Not Found', | ||||
|         }, | ||||
|         { | ||||
|             # YouTube video | ||||
|             'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957', | ||||
|             'only_matching': True, | ||||
|             'md5': 'b8b93da1df1cea6c8556255a796b7d61', | ||||
|             'info_dict': { | ||||
|                 'id': 'Wji-BZ0oCwg', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer', | ||||
|                 'description': 'md5:8e468944dce15567a786a67f74262583', | ||||
|                 'uploader': 'Star Wars', | ||||
|                 'uploader_id': 'starwars', | ||||
|                 'upload_date': '20160407', | ||||
|             }, | ||||
|             'add_ie': ['Youtube'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', | ||||
|             'only_matching': True, | ||||
|             'md5': '', | ||||
|             'info_dict': { | ||||
|                 'id': '2377055', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Cafe Derby', | ||||
|                 'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.', | ||||
|                 'upload_date': '20150626', | ||||
|                 'timestamp': 1435305240.769, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -98,6 +123,32 @@ class VRTIE(InfoExtractor): | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     src, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     src.replace('playlist.m3u8', 'manifest.f4m'), | ||||
|                     video_id, f4m_id='hds', fatal=False)) | ||||
|                 if 'data-video-geoblocking="true"' not in webpage: | ||||
|                     rtmp_formats = self._extract_smil_formats( | ||||
|                         src.replace('playlist.m3u8', 'jwplayer.smil'), | ||||
|                         video_id, fatal=False) | ||||
|                     formats.extend(rtmp_formats) | ||||
|                     for rtmp_format in rtmp_formats: | ||||
|                         rtmp_format_c = rtmp_format.copy() | ||||
|                         rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) | ||||
|                         del rtmp_format_c['play_path'] | ||||
|                         del rtmp_format_c['ext'] | ||||
|                         http_format = rtmp_format_c.copy() | ||||
|                         http_format.update({ | ||||
|                             'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''), | ||||
|                             'format_id': rtmp_format['format_id'].replace('rtmp', 'http'), | ||||
|                             'protocol': 'http', | ||||
|                         }) | ||||
|                         rtsp_format = rtmp_format_c.copy() | ||||
|                         rtsp_format.update({ | ||||
|                             'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), | ||||
|                             'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), | ||||
|                             'protocol': 'rtsp', | ||||
|                         }) | ||||
|                         formats.extend([http_format, rtsp_format]) | ||||
|             else: | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) | ||||
|   | ||||
| @@ -6,17 +6,23 @@ from ..compat import compat_urllib_parse_unquote | ||||
|  | ||||
|  | ||||
| class XNXXIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)' | ||||
|     _TEST = { | ||||
|         'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         'md5': '0831677e2b4761795f68d417e0b7b445', | ||||
|     _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video', | ||||
|         'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0', | ||||
|         'info_dict': { | ||||
|             'id': '1135332', | ||||
|             'id': '55awb78', | ||||
|             'ext': 'flv', | ||||
|             'title': 'lida » Naked Funny Actress  (5)', | ||||
|             'title': 'Skyrim Test Video', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.xnxx.com/video-55awb78/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|   | ||||
| @@ -4,17 +4,23 @@ import itertools | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     orderedSet, | ||||
|     parse_duration, | ||||
|     sanitized_Request, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class XTubeIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:xtube:|https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-))(?P<id>[^/?&#]+)' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                         (?: | ||||
|                             xtube:| | ||||
|                             https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-) | ||||
|                         ) | ||||
|                         (?P<id>[^/?&#]+) | ||||
|                     ''' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # old URL schema | ||||
| @@ -27,6 +33,8 @@ class XTubeIE(InfoExtractor): | ||||
|             'description': 'contains:an ET kind of thing', | ||||
|             'uploader': 'greenshowers', | ||||
|             'duration': 450, | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     }, { | ||||
| @@ -51,21 +59,30 @@ class XTubeIE(InfoExtractor): | ||||
|         req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') | ||||
|         webpage = self._download_webpage(req, display_id) | ||||
|  | ||||
|         flashvars = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'xt\.playerOps\s*=\s*({.+?});', webpage, 'player ops'), | ||||
|             video_id)['flashvars'] | ||||
|         sources = self._parse_json(self._search_regex( | ||||
|             r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id) | ||||
|  | ||||
|         title = flashvars.get('title') or self._search_regex( | ||||
|             r'<h1>([^<]+)</h1>', webpage, 'title') | ||||
|         video_url = compat_urllib_parse_unquote(flashvars['video_url']) | ||||
|         duration = int_or_none(flashvars.get('video_duration')) | ||||
|         formats = [] | ||||
|         for format_id, format_url in sources.items(): | ||||
|             formats.append({ | ||||
|                 'url': format_url, | ||||
|                 'format_id': format_id, | ||||
|                 'height': int_or_none(format_id), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         uploader = self._search_regex( | ||||
|             r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         title = self._search_regex( | ||||
|             (r'<h1>(?P<title>[^<]+)</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), | ||||
|             webpage, 'title', group='title') | ||||
|         description = self._search_regex( | ||||
|             r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) | ||||
|         uploader = self._search_regex( | ||||
|             (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', | ||||
|              r'<span[^>]+class="nickname"[^>]*>([^<]+)'), | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'<dt>Runtime:</dt>\s*<dd>([^<]+)</dd>', | ||||
|             webpage, 'duration', fatal=False)) | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>', | ||||
|             webpage, 'view count', fatal=False)) | ||||
| @@ -76,7 +93,6 @@ class XTubeIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'uploader': uploader, | ||||
| @@ -84,6 +100,7 @@ class XTubeIE(InfoExtractor): | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'age_limit': 18, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -501,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '141', | ||||
|             }, | ||||
|             'skip': 'format 141 not served anymore', | ||||
|         }, | ||||
|         # DASH manifest with encrypted signature | ||||
|         { | ||||
| @@ -517,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             }, | ||||
|             'params': { | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '141', | ||||
|                 'format': '141/bestaudio[ext=m4a]', | ||||
|             }, | ||||
|         }, | ||||
|         # JS player signature function name containing $ | ||||
| @@ -537,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             }, | ||||
|             'params': { | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '141', | ||||
|                 'format': '141/bestaudio[ext=m4a]', | ||||
|             }, | ||||
|         }, | ||||
|         # Controversy video | ||||
| @@ -618,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic', | ||||
|                 'license': 'Standard YouTube License', | ||||
|                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', | ||||
|                 'uploader': 'Olympics', | ||||
|                 'uploader': 'Olympic', | ||||
|                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games', | ||||
|             }, | ||||
|             'params': { | ||||
| @@ -671,7 +672,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000', | ||||
|                 'uploader': 'dorappi2000', | ||||
|                 'license': 'Standard YouTube License', | ||||
|                 'formats': 'mincount:33', | ||||
|                 'formats': 'mincount:32', | ||||
|             }, | ||||
|         }, | ||||
|         # DASH manifest with segment_list | ||||
| @@ -691,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             'params': { | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '135',  # bestvideo | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'This live event has ended.', | ||||
|         }, | ||||
|         { | ||||
|             # Multifeed videos (multiple cameras), URL is for Main Camera | ||||
| @@ -762,6 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', | ||||
|             }, | ||||
|             'playlist_count': 2, | ||||
|             'skip': 'Not multifeed anymore', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vid.plus/FlRa-iH7PGw', | ||||
| @@ -814,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'This video does not exist.', | ||||
|         }, | ||||
|         { | ||||
|             # Video licensed under Creative Commons | ||||
| @@ -1331,7 +1335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                     (?:[a-zA-Z-]+="[^"]*"\s+)*? | ||||
|                     (?:title|href)="([^"]+)"\s+ | ||||
|                     (?:[a-zA-Z-]+="[^"]*"\s+)*? | ||||
|                     class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*> | ||||
|                     class="[^"]*"[^>]*> | ||||
|                 [^<]+\.{3}\s* | ||||
|                 </a> | ||||
|             ''', r'\1', video_description) | ||||
|   | ||||
| @@ -131,8 +131,9 @@ class JSInterpreter(object): | ||||
|             if variable in local_vars: | ||||
|                 obj = local_vars[variable] | ||||
|             else: | ||||
|                 obj = self._objects.setdefault( | ||||
|                     variable, self.extract_object(variable)) | ||||
|                 if variable not in self._objects: | ||||
|                     self._objects[variable] = self.extract_object(variable) | ||||
|                 obj = self._objects[variable] | ||||
|  | ||||
|             if arg_str is None: | ||||
|                 # Member access | ||||
| @@ -203,7 +204,8 @@ class JSInterpreter(object): | ||||
|             argvals = tuple([ | ||||
|                 int(v) if v.isdigit() else local_vars[v] | ||||
|                 for v in m.group('args').split(',')]) | ||||
|             self._functions.setdefault(fname, self.extract_function(fname)) | ||||
|             if fname not in self._functions: | ||||
|                 self._functions[fname] = self.extract_function(fname) | ||||
|             return self._functions[fname](argvals) | ||||
|  | ||||
|         raise ExtractorError('Unsupported JS expression %r' % expr) | ||||
| @@ -230,7 +232,7 @@ class JSInterpreter(object): | ||||
|     def extract_function(self, funcname): | ||||
|         func_m = re.search( | ||||
|             r'''(?x) | ||||
|                 (?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s* | ||||
|                 (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* | ||||
|                 \((?P<args>[^)]*)\)\s* | ||||
|                 \{(?P<code>[^}]+)\}''' % ( | ||||
|                 re.escape(funcname), re.escape(funcname), re.escape(funcname)), | ||||
|   | ||||
| @@ -26,9 +26,7 @@ def parseOpts(overrideArguments=None): | ||||
|         except IOError: | ||||
|             return default  # silently skip if file is not present | ||||
|         try: | ||||
|             res = [] | ||||
|             for l in optionf: | ||||
|                 res += compat_shlex_split(l, comments=True) | ||||
|             res = compat_shlex_split(optionf.read(), comments=True) | ||||
|         finally: | ||||
|             optionf.close() | ||||
|         return res | ||||
|   | ||||
| @@ -76,7 +76,7 @@ class Socks4Error(ProxyError): | ||||
|  | ||||
|     CODES = { | ||||
|         91: 'request rejected or failed', | ||||
|         92: 'request rejected becasue SOCKS server cannot connect to identd on the client', | ||||
|         92: 'request rejected because SOCKS server cannot connect to identd on the client', | ||||
|         93: 'request rejected because the client program and identd report different user-ids' | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -110,6 +110,49 @@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙ | ||||
|                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], | ||||
|                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) | ||||
|  | ||||
| DATE_FORMATS = ( | ||||
|     '%d %B %Y', | ||||
|     '%d %b %Y', | ||||
|     '%B %d %Y', | ||||
|     '%b %d %Y', | ||||
|     '%b %dst %Y %I:%M', | ||||
|     '%b %dnd %Y %I:%M', | ||||
|     '%b %dth %Y %I:%M', | ||||
|     '%Y %m %d', | ||||
|     '%Y-%m-%d', | ||||
|     '%Y/%m/%d', | ||||
|     '%Y/%m/%d %H:%M:%S', | ||||
|     '%Y-%m-%d %H:%M:%S', | ||||
|     '%Y-%m-%d %H:%M:%S.%f', | ||||
|     '%d.%m.%Y %H:%M', | ||||
|     '%d.%m.%Y %H.%M', | ||||
|     '%Y-%m-%dT%H:%M:%SZ', | ||||
|     '%Y-%m-%dT%H:%M:%S.%fZ', | ||||
|     '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||
|     '%Y-%m-%dT%H:%M:%S', | ||||
|     '%Y-%m-%dT%H:%M:%S.%f', | ||||
|     '%Y-%m-%dT%H:%M', | ||||
| ) | ||||
|  | ||||
| DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) | ||||
| DATE_FORMATS_DAY_FIRST.extend([ | ||||
|     '%d-%m-%Y', | ||||
|     '%d.%m.%Y', | ||||
|     '%d.%m.%y', | ||||
|     '%d/%m/%Y', | ||||
|     '%d/%m/%y', | ||||
|     '%d/%m/%Y %H:%M:%S', | ||||
| ]) | ||||
|  | ||||
| DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS) | ||||
| DATE_FORMATS_MONTH_FIRST.extend([ | ||||
|     '%m-%d-%Y', | ||||
|     '%m.%d.%Y', | ||||
|     '%m/%d/%Y', | ||||
|     '%m/%d/%y', | ||||
|     '%m/%d/%Y %H:%M:%S', | ||||
| ]) | ||||
|  | ||||
|  | ||||
| def preferredencoding(): | ||||
|     """Get preferred encoding. | ||||
| @@ -975,6 +1018,24 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): | ||||
|     https_response = http_response | ||||
|  | ||||
|  | ||||
| def extract_timezone(date_str): | ||||
|     m = re.search( | ||||
|         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', | ||||
|         date_str) | ||||
|     if not m: | ||||
|         timezone = datetime.timedelta() | ||||
|     else: | ||||
|         date_str = date_str[:-len(m.group('tz'))] | ||||
|         if not m.group('sign'): | ||||
|             timezone = datetime.timedelta() | ||||
|         else: | ||||
|             sign = 1 if m.group('sign') == '+' else -1 | ||||
|             timezone = datetime.timedelta( | ||||
|                 hours=sign * int(m.group('hours')), | ||||
|                 minutes=sign * int(m.group('minutes'))) | ||||
|     return timezone, date_str | ||||
|  | ||||
|  | ||||
| def parse_iso8601(date_str, delimiter='T', timezone=None): | ||||
|     """ Return a UNIX timestamp from the given date """ | ||||
|  | ||||
| @@ -984,20 +1045,8 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): | ||||
|     date_str = re.sub(r'\.[0-9]+', '', date_str) | ||||
|  | ||||
|     if timezone is None: | ||||
|         m = re.search( | ||||
|             r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', | ||||
|             date_str) | ||||
|         if not m: | ||||
|             timezone = datetime.timedelta() | ||||
|         else: | ||||
|             date_str = date_str[:-len(m.group(0))] | ||||
|             if not m.group('sign'): | ||||
|                 timezone = datetime.timedelta() | ||||
|             else: | ||||
|                 sign = 1 if m.group('sign') == '+' else -1 | ||||
|                 timezone = datetime.timedelta( | ||||
|                     hours=sign * int(m.group('hours')), | ||||
|                     minutes=sign * int(m.group('minutes'))) | ||||
|         timezone, date_str = extract_timezone(date_str) | ||||
|  | ||||
|     try: | ||||
|         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) | ||||
|         dt = datetime.datetime.strptime(date_str, date_format) - timezone | ||||
| @@ -1006,6 +1055,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| def date_formats(day_first=True): | ||||
|     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST | ||||
|  | ||||
|  | ||||
| def unified_strdate(date_str, day_first=True): | ||||
|     """Return a string with the date in the format YYYYMMDD""" | ||||
|  | ||||
| @@ -1014,53 +1067,11 @@ def unified_strdate(date_str, day_first=True): | ||||
|     upload_date = None | ||||
|     # Replace commas | ||||
|     date_str = date_str.replace(',', ' ') | ||||
|     # %z (UTC offset) is only supported in python>=3.2 | ||||
|     if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str): | ||||
|         date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) | ||||
|     # Remove AM/PM + timezone | ||||
|     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) | ||||
|     _, date_str = extract_timezone(date_str) | ||||
|  | ||||
|     format_expressions = [ | ||||
|         '%d %B %Y', | ||||
|         '%d %b %Y', | ||||
|         '%B %d %Y', | ||||
|         '%b %d %Y', | ||||
|         '%b %dst %Y %I:%M', | ||||
|         '%b %dnd %Y %I:%M', | ||||
|         '%b %dth %Y %I:%M', | ||||
|         '%Y %m %d', | ||||
|         '%Y-%m-%d', | ||||
|         '%Y/%m/%d', | ||||
|         '%Y/%m/%d %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S.%f', | ||||
|         '%d.%m.%Y %H:%M', | ||||
|         '%d.%m.%Y %H.%M', | ||||
|         '%Y-%m-%dT%H:%M:%SZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%fZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||
|         '%Y-%m-%dT%H:%M:%S', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f', | ||||
|         '%Y-%m-%dT%H:%M', | ||||
|     ] | ||||
|     if day_first: | ||||
|         format_expressions.extend([ | ||||
|             '%d-%m-%Y', | ||||
|             '%d.%m.%Y', | ||||
|             '%d.%m.%y', | ||||
|             '%d/%m/%Y', | ||||
|             '%d/%m/%y', | ||||
|             '%d/%m/%Y %H:%M:%S', | ||||
|         ]) | ||||
|     else: | ||||
|         format_expressions.extend([ | ||||
|             '%m-%d-%Y', | ||||
|             '%m.%d.%Y', | ||||
|             '%m/%d/%Y', | ||||
|             '%m/%d/%y', | ||||
|             '%m/%d/%Y %H:%M:%S', | ||||
|         ]) | ||||
|     for expression in format_expressions: | ||||
|     for expression in date_formats(day_first): | ||||
|         try: | ||||
|             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') | ||||
|         except ValueError: | ||||
| @@ -1076,6 +1087,29 @@ def unified_strdate(date_str, day_first=True): | ||||
|         return compat_str(upload_date) | ||||
|  | ||||
|  | ||||
| def unified_timestamp(date_str, day_first=True): | ||||
|     if date_str is None: | ||||
|         return None | ||||
|  | ||||
|     date_str = date_str.replace(',', ' ') | ||||
|  | ||||
|     pm_delta = datetime.timedelta(hours=12 if re.search(r'(?i)PM', date_str) else 0) | ||||
|     timezone, date_str = extract_timezone(date_str) | ||||
|  | ||||
|     # Remove AM/PM + timezone | ||||
|     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) | ||||
|  | ||||
|     for expression in date_formats(day_first): | ||||
|         try: | ||||
|             dt = datetime.datetime.strptime(date_str, expression) - timezone + pm_delta | ||||
|             return calendar.timegm(dt.timetuple()) | ||||
|         except ValueError: | ||||
|             pass | ||||
|     timetuple = email.utils.parsedate_tz(date_str) | ||||
|     if timetuple: | ||||
|         return calendar.timegm(timetuple.timetuple()) | ||||
|  | ||||
|  | ||||
| def determine_ext(url, default_ext='unknown_video'): | ||||
|     if url is None: | ||||
|         return default_ext | ||||
| @@ -1591,6 +1625,11 @@ class HEADRequest(compat_urllib_request.Request): | ||||
|         return 'HEAD' | ||||
|  | ||||
|  | ||||
| class PUTRequest(compat_urllib_request.Request): | ||||
|     def get_method(self): | ||||
|         return 'PUT' | ||||
|  | ||||
|  | ||||
| def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): | ||||
|     if get_attr: | ||||
|         if v is not None: | ||||
| @@ -1626,6 +1665,10 @@ def float_or_none(v, scale=1, invscale=1, default=None): | ||||
|         return default | ||||
|  | ||||
|  | ||||
| def strip_or_none(v): | ||||
|     return None if v is None else v.strip() | ||||
|  | ||||
|  | ||||
| def parse_duration(s): | ||||
|     if not isinstance(s, compat_basestring): | ||||
|         return None | ||||
| @@ -1882,7 +1925,13 @@ def update_Request(req, url=None, data=None, headers={}, query={}): | ||||
|     req_headers.update(headers) | ||||
|     req_data = data or req.data | ||||
|     req_url = update_url_query(url or req.get_full_url(), query) | ||||
|     req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request | ||||
|     req_get_method = req.get_method() | ||||
|     if req_get_method == 'HEAD': | ||||
|         req_type = HEADRequest | ||||
|     elif req_get_method == 'PUT': | ||||
|         req_type = PUTRequest | ||||
|     else: | ||||
|         req_type = compat_urllib_request.Request | ||||
|     new_req = req_type( | ||||
|         req_url, data=req_data, headers=req_headers, | ||||
|         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) | ||||
| @@ -2852,3 +2901,16 @@ def decode_packed_codes(code): | ||||
|     return re.sub( | ||||
|         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], | ||||
|         obfucasted_code) | ||||
|  | ||||
|  | ||||
| def parse_m3u8_attributes(attrib): | ||||
|     info = {} | ||||
|     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib): | ||||
|         if val.startswith('"'): | ||||
|             val = val[1:-1] | ||||
|         info[key] = val | ||||
|     return info | ||||
|  | ||||
|  | ||||
| def urshift(val, n): | ||||
|     return val >> n if val >= 0 else (val + 0x100000000) >> n | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2016.06.19.1' | ||||
| __version__ = '2016.07.03' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user