Compare commits
	
		
			82 Commits
		
	
	
		
			2014.01.28
			...
			2014.02.03
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 90159f5561 | ||
|   | 99877772d0 | ||
|   | b0268cb6ce | ||
|   | 4edff4cfa8 | ||
|   | 1eac553e7e | ||
|   | 9d3ac7444d | ||
|   | 588128d054 | ||
|   | 8e93b9b9aa | ||
|   | b4bcffefa3 | ||
|   | 2b39af9b4f | ||
|   | 23fe495feb | ||
|   | b5dbe89bba | ||
|   | dbe80ca7ad | ||
|   | 009a3408f5 | ||
|   | b58e3c8918 | ||
|   | 56b6faf91e | ||
|   | 7ac1f877a7 | ||
|   | d55433bbfd | ||
|   | f0ce2bc1c5 | ||
|   | c3bc00b90e | ||
|   | ff6b7b049b | ||
|   | f46359121f | ||
|   | 37c1525c17 | ||
|   | c85e4cf7b4 | ||
|   | c66dcda287 | ||
|   | 6d845922ab | ||
|   | 2949cbe036 | ||
|   | c3309a7774 | ||
|   | 7aed837595 | ||
|   | 0eb799bae9 | ||
|   | 4baff4a4ae | ||
|   | 45d7bc2f8b | ||
|   | c0c2ddddcd | ||
|   | a96ed91610 | ||
|   | c1206423c4 | ||
|   | 659aa21ba1 | ||
|   | efd02e858a | ||
|   | 3bf8bc7f37 | ||
|   | 8ccda826d5 | ||
|   | b9381e43c2 | ||
|   | fcdea2666d | ||
|   | c4db377cbb | ||
|   | 90dc5e8693 | ||
|   | c81a855b0f | ||
|   | c8d8ec8567 | ||
|   | 4f879a5be0 | ||
|   | 1a0648b4a9 | ||
|   | 3c1b4669d0 | ||
|   | 24b3d5e538 | ||
|   | ab083b08ab | ||
|   | 89acb96927 | ||
|   | 79752e18b1 | ||
|   | 55b41c723c | ||
|   | 9f8928d032 | ||
|   | 3effa7ceaa | ||
|   | ed9cc2f1e0 | ||
|   | 975fa541c2 | ||
|   | 251974e44c | ||
|   | 38a40276ec | ||
|   | 57b6288358 | ||
|   | c3f51436bf | ||
|   | 0c708f11cb | ||
|   | fb2a706d11 | ||
|   | 0b76600deb | ||
|   | 245b612a36 | ||
|   | d882161d5a | ||
|   | d4a21e0b49 | ||
|   | 26a78d4bbf | ||
|   | 8db69786c2 | ||
|   | b11cec4162 | ||
|   | 7eeb5bef24 | ||
|   | 9d2032932c | ||
|   | 6490306017 | ||
|   | ceb2b7d257 | ||
|   | 459a53c2c2 | ||
|   | adc267eebf | ||
|   | ffe8f62d27 | ||
|   | ed85007039 | ||
|   | 5aaca50d60 | ||
|   | 869baf3565 | ||
|   | e299f6d27f | ||
|   | d1b30713fb | 
							
								
								
									
										12
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								README.md
									
									
									
									
									
								
							| @@ -53,6 +53,12 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      from google videos for  youtube-dl "large | ||||
|                                      apple". By default (with value "auto") | ||||
|                                      youtube-dl guesses. | ||||
|     --ignore-config                  Do not read configuration files. When given | ||||
|                                      in the global configuration file /etc | ||||
|                                      /youtube-dl.conf: do not read the user | ||||
|                                      configuration in ~/.config/youtube-dl.conf | ||||
|                                      (%APPDATA%/youtube-dl/config.txt on | ||||
|                                      Windows) | ||||
|  | ||||
| ## Video Selection: | ||||
|     --playlist-start NUMBER          playlist video to start at (default is 1) | ||||
| @@ -325,7 +331,7 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz | ||||
|  | ||||
| To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). | ||||
|  | ||||
| # BUILD INSTRUCTIONS | ||||
| # DEVELOPER INSTRUCTIONS | ||||
|  | ||||
| Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. | ||||
|  | ||||
| @@ -347,6 +353,10 @@ If you want to create a build of youtube-dl yourself, you'll need | ||||
| * zip | ||||
| * nosetests | ||||
|  | ||||
| ### Adding support for a new site | ||||
|  | ||||
| If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/). | ||||
|  | ||||
| # BUGS | ||||
|  | ||||
| Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. | ||||
|   | ||||
| @@ -34,6 +34,7 @@ from youtube_dl.extractor import ( | ||||
|     KhanAcademyIE, | ||||
|     EveryonesMixtapeIE, | ||||
|     RutubeChannelIE, | ||||
|     GenericIE, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertEqual(result['id'], '1409') | ||||
|         self.assertTrue(len(result['entries']) >= 34) | ||||
|  | ||||
|     def test_multiple_brightcove_videos(self): | ||||
|         # https://github.com/rg3/youtube-dl/issues/2283 | ||||
|         dl = FakeYDL() | ||||
|         ie = GenericIE(dl) | ||||
|         result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'always-never-nuclear-command-and-control') | ||||
|         self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker') | ||||
|         self.assertEqual(len(result['entries']), 3) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -10,9 +10,11 @@ from test.helper import FakeYDL, md5 | ||||
|  | ||||
|  | ||||
| from youtube_dl.extractor import ( | ||||
|     BlipTVIE, | ||||
|     YoutubeIE, | ||||
|     DailymotionIE, | ||||
|     TEDIE, | ||||
|     VimeoIE, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -202,5 +204,80 @@ class TestTedSubtitles(BaseTestSubtitles): | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
|  | ||||
| class TestBlipTVSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://blip.tv/a/a-6603250' | ||||
|     IE = BlipTVIE | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning(u'Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.expect_warning(u'Automatic Captions not supported by this server') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['en'])) | ||||
|         self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4') | ||||
|  | ||||
|  | ||||
| class TestVimeoSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://vimeo.com/76979871' | ||||
|     IE = VimeoIE | ||||
|  | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|  | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') | ||||
|  | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr'])) | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning(u'Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.expect_warning(u'Automatic Captions not supported by this server') | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|  | ||||
|     def test_nosubtitles(self): | ||||
|         self.DL.expect_warning(u'video doesn\'t have subtitles') | ||||
|         self.url = 'http://vimeo.com/56015672' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|  | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
							
								
								
									
										24
									
								
								youtube-dl.plugin.zsh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								youtube-dl.plugin.zsh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| # This allows the youtube-dl command to be installed in ZSH using antigen. | ||||
| # Antigen is a bundle manager. It allows you to enhance the functionality of | ||||
| # your zsh session by installing bundles and themes easily. | ||||
|  | ||||
| # Antigen documentation: | ||||
| # http://antigen.sharats.me/ | ||||
| # https://github.com/zsh-users/antigen | ||||
|  | ||||
| # Install youtube-dl: | ||||
| # antigen bundle rg3/youtube-dl | ||||
| # Bundles installed by antigen are available for use immediately. | ||||
|  | ||||
| # Update youtube-dl (and all other antigen bundles): | ||||
| # antigen update | ||||
|  | ||||
| # The antigen command will download the git repository to a folder and then | ||||
| # execute an enabling script (this file). The complete process for loading the | ||||
| # code is documented here: | ||||
| # https://github.com/zsh-users/antigen#notes-on-writing-plugins | ||||
|  | ||||
| # This specific script just aliases youtube-dl to the python script that this | ||||
| # library provides. This requires updating the PYTHONPATH to ensure that the | ||||
| # full set of code can be located. | ||||
| alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl" | ||||
| @@ -100,6 +100,43 @@ def parseOpts(overrideArguments=None): | ||||
|             optionf.close() | ||||
|         return res | ||||
|  | ||||
|     def _readUserConf(): | ||||
|         xdg_config_home = os.environ.get('XDG_CONFIG_HOME') | ||||
|         if xdg_config_home: | ||||
|             userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') | ||||
|             if not os.path.isfile(userConfFile): | ||||
|                 userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') | ||||
|         else: | ||||
|             userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') | ||||
|             if not os.path.isfile(userConfFile): | ||||
|                 userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') | ||||
|         userConf = _readOptions(userConfFile, None) | ||||
|  | ||||
|         if userConf is None: | ||||
|             appdata_dir = os.environ.get('appdata') | ||||
|             if appdata_dir: | ||||
|                 userConf = _readOptions( | ||||
|                     os.path.join(appdata_dir, 'youtube-dl', 'config'), | ||||
|                     default=None) | ||||
|                 if userConf is None: | ||||
|                     userConf = _readOptions( | ||||
|                         os.path.join(appdata_dir, 'youtube-dl', 'config.txt'), | ||||
|                         default=None) | ||||
|  | ||||
|         if userConf is None: | ||||
|             userConf = _readOptions( | ||||
|                 os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'), | ||||
|                 default=None) | ||||
|         if userConf is None: | ||||
|             userConf = _readOptions( | ||||
|                 os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'), | ||||
|                 default=None) | ||||
|  | ||||
|         if userConf is None: | ||||
|             userConf = [] | ||||
|  | ||||
|         return userConf | ||||
|  | ||||
|     def _format_option_string(option): | ||||
|         ''' ('-o', '--option') -> -o, --format METAVAR''' | ||||
|  | ||||
| @@ -203,6 +240,11 @@ def parseOpts(overrideArguments=None): | ||||
|     general.add_option('--default-search', | ||||
|             dest='default_search', metavar='PREFIX', | ||||
|             help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') | ||||
|     general.add_option( | ||||
|         '--ignore-config', | ||||
|         action='store_true', | ||||
|         help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') | ||||
|  | ||||
|  | ||||
|     selection.add_option( | ||||
|         '--playlist-start', | ||||
| @@ -457,44 +499,18 @@ def parseOpts(overrideArguments=None): | ||||
|         if opts.verbose: | ||||
|             write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') | ||||
|     else: | ||||
|         systemConf = _readOptions('/etc/youtube-dl.conf') | ||||
|  | ||||
|         xdg_config_home = os.environ.get('XDG_CONFIG_HOME') | ||||
|         if xdg_config_home: | ||||
|             userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') | ||||
|             if not os.path.isfile(userConfFile): | ||||
|                 userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') | ||||
|         else: | ||||
|             userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') | ||||
|             if not os.path.isfile(userConfFile): | ||||
|                 userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') | ||||
|         userConf = _readOptions(userConfFile, None) | ||||
|  | ||||
|         if userConf is None: | ||||
|             appdata_dir = os.environ.get('appdata') | ||||
|             if appdata_dir: | ||||
|                 userConf = _readOptions( | ||||
|                     os.path.join(appdata_dir, 'youtube-dl', 'config'), | ||||
|                     default=None) | ||||
|                 if userConf is None: | ||||
|                     userConf = _readOptions( | ||||
|                         os.path.join(appdata_dir, 'youtube-dl', 'config.txt'), | ||||
|                         default=None) | ||||
|  | ||||
|         if userConf is None: | ||||
|             userConf = _readOptions( | ||||
|                 os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'), | ||||
|                 default=None) | ||||
|         if userConf is None: | ||||
|             userConf = _readOptions( | ||||
|                 os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'), | ||||
|                 default=None) | ||||
|  | ||||
|         if userConf is None: | ||||
|             userConf = [] | ||||
|  | ||||
|         commandLineConf = sys.argv[1:] | ||||
|         if '--ignore-config' in commandLineConf: | ||||
|             systemConf = [] | ||||
|             userConf = [] | ||||
|         else: | ||||
|             systemConf = _readOptions('/etc/youtube-dl.conf') | ||||
|             if '--ignore-config' in systemConf: | ||||
|                 userConf = [] | ||||
|             else: | ||||
|                 userConf = _readUserConf() | ||||
|         argv = systemConf + userConf + commandLineConf | ||||
|  | ||||
|         opts, args = parser.parse_args(argv) | ||||
|         if opts.verbose: | ||||
|             write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') | ||||
|   | ||||
| @@ -110,6 +110,7 @@ from .khanacademy import KhanAcademyIE | ||||
| from .kickstarter import KickStarterIE | ||||
| from .keek import KeekIE | ||||
| from .la7 import LA7IE | ||||
| from .lifenews import LifeNewsIE | ||||
| from .liveleak import LiveLeakIE | ||||
| from .livestream import LivestreamIE, LivestreamOriginalIE | ||||
| from .lynda import ( | ||||
| @@ -141,6 +142,7 @@ from .newgrounds import NewgroundsIE | ||||
| from .nhl import NHLIE, NHLVideocenterIE | ||||
| from .niconico import NiconicoIE | ||||
| from .ninegag import NineGagIE | ||||
| from .normalboots import NormalbootsIE | ||||
| from .novamov import NovamovIE | ||||
| from .nowness import NownessIE | ||||
| from .nowvideo import NowVideoIE | ||||
| @@ -198,6 +200,7 @@ from .ted import TEDIE | ||||
| from .tf1 import TF1IE | ||||
| from .theplatform import ThePlatformIE | ||||
| from .thisav import ThisAVIE | ||||
| from .tinypic import TinyPicIE | ||||
| from .toutv import TouTvIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| from .trilulilu import TriluliluIE | ||||
| @@ -228,6 +231,7 @@ from .vimeo import ( | ||||
| from .vine import VineIE | ||||
| from .viki import VikiIE | ||||
| from .vk import VKIE | ||||
| from .vube import VubeIE | ||||
| from .wat import WatIE | ||||
| from .weibo import WeiboIE | ||||
| from .wimp import WimpIE | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import re | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
| @@ -17,112 +18,125 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BlipTVIE(InfoExtractor): | ||||
| class BlipTVIE(SubtitlesInfoExtractor): | ||||
|     """Information extractor for blip.tv""" | ||||
|  | ||||
|     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', | ||||
|         'file': '5779306.mov', | ||||
|         'md5': 'c6934ad0b6acf2bd920720ec888eb812', | ||||
|         'info_dict': { | ||||
|             'id': '5779306', | ||||
|             'ext': 'mov', | ||||
|             'upload_date': '20111205', | ||||
|             'description': 'md5:9bc31f227219cde65e47eeec8d2dc596', | ||||
|             'uploader': 'Comic Book Resources - CBR TV', | ||||
|             'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def report_direct_download(self, title): | ||||
|         """Report information extraction.""" | ||||
|         self.to_screen('%s: Direct download detected' % title) | ||||
|     }, { | ||||
|         # https://github.com/rg3/youtube-dl/pull/2274 | ||||
|         'note': 'Video with subtitles', | ||||
|         'url': 'http://blip.tv/play/h6Uag5OEVgI.html', | ||||
|         'md5': '309f9d25b820b086ca163ffac8031806', | ||||
|         'info_dict': { | ||||
|             'id': '6586561', | ||||
|             'ext': 'mp4', | ||||
|             'uploader': 'Red vs. Blue', | ||||
|             'description': 'One-Zero-One', | ||||
|             'upload_date': '20130614', | ||||
|             'title': 'Red vs. Blue Season 11 Episode 1', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|         presumptive_id = mobj.group('presumptive_id') | ||||
|  | ||||
|         # See https://github.com/rg3/youtube-dl/issues/857 | ||||
|         embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url) | ||||
|         embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url) | ||||
|         if embed_mobj: | ||||
|             info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1) | ||||
|             info_page = self._download_webpage(info_url, embed_mobj.group(1)) | ||||
|             video_id = self._search_regex(r'data-episode-id="(\d+)', info_page,  'video_id') | ||||
|             video_id = self._search_regex( | ||||
|                 r'data-episode-id="([0-9]+)', info_page, 'video_id') | ||||
|             return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV') | ||||
|  | ||||
|         if '?' in url: | ||||
|             cchar = '&' | ||||
|         else: | ||||
|             cchar = '?' | ||||
|          | ||||
|         cchar = '&' if '?' in url else '?' | ||||
|         json_url = url + cchar + 'skin=json&version=2&no_wrap=1' | ||||
|         request = compat_urllib_request.Request(json_url) | ||||
|         request.add_header('User-Agent', 'iTunes/10.6.1') | ||||
|         self.report_extraction(mobj.group(1)) | ||||
|         urlh = self._request_webpage(request, None, False, | ||||
|             'unable to download video info webpage') | ||||
|  | ||||
|         try: | ||||
|             json_code_bytes = urlh.read() | ||||
|             json_code = json_code_bytes.decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err)) | ||||
|         json_data = self._download_json(request, video_id=presumptive_id) | ||||
|  | ||||
|         try: | ||||
|             json_data = json.loads(json_code) | ||||
|             if 'Post' in json_data: | ||||
|                 data = json_data['Post'] | ||||
|             else: | ||||
|                 data = json_data | ||||
|         if 'Post' in json_data: | ||||
|             data = json_data['Post'] | ||||
|         else: | ||||
|             data = json_data | ||||
|  | ||||
|             upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') | ||||
|             formats = [] | ||||
|             if 'additionalMedia' in data: | ||||
|                 for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])): | ||||
|                     if not int(f['media_width']): # filter m3u8 | ||||
|                         continue | ||||
|                     formats.append({ | ||||
|                         'url': f['url'], | ||||
|                         'format_id': f['role'], | ||||
|                         'width': int(f['media_width']), | ||||
|                         'height': int(f['media_height']), | ||||
|                     }) | ||||
|             else: | ||||
|         video_id = compat_str(data['item_id']) | ||||
|         upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') | ||||
|         subtitles = {} | ||||
|         formats = [] | ||||
|         if 'additionalMedia' in data: | ||||
|             for f in data['additionalMedia']: | ||||
|                 if f.get('file_type_srt') == 1: | ||||
|                     LANGS = { | ||||
|                         'english': 'en', | ||||
|                     } | ||||
|                     lang = f['role'].rpartition('-')[-1].strip().lower() | ||||
|                     langcode = LANGS.get(lang, lang) | ||||
|                     subtitles[langcode] = f['url'] | ||||
|                     continue | ||||
|                 if not int(f['media_width']):  # filter m3u8 | ||||
|                     continue | ||||
|                 formats.append({ | ||||
|                     'url': data['media']['url'], | ||||
|                     'width': int(data['media']['width']), | ||||
|                     'height': int(data['media']['height']), | ||||
|                     'url': f['url'], | ||||
|                     'format_id': f['role'], | ||||
|                     'width': int(f['media_width']), | ||||
|                     'height': int(f['media_height']), | ||||
|                 }) | ||||
|         else: | ||||
|             formats.append({ | ||||
|                 'url': data['media']['url'], | ||||
|                 'width': int(data['media']['width']), | ||||
|                 'height': int(data['media']['height']), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, subtitles) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|             return { | ||||
|                 'id': compat_str(data['item_id']), | ||||
|                 'uploader': data['display_name'], | ||||
|                 'upload_date': upload_date, | ||||
|                 'title': data['title'], | ||||
|                 'thumbnail': data['thumbnailUrl'], | ||||
|                 'description': data['description'], | ||||
|                 'user_agent': 'iTunes/10.6.1', | ||||
|                 'formats': formats, | ||||
|             } | ||||
|         except (ValueError, KeyError) as err: | ||||
|             raise ExtractorError('Unable to parse video information: %s' % repr(err)) | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'uploader': data['display_name'], | ||||
|             'upload_date': upload_date, | ||||
|             'title': data['title'], | ||||
|             'thumbnail': data['thumbnailUrl'], | ||||
|             'description': data['description'], | ||||
|             'user_agent': 'iTunes/10.6.1', | ||||
|             'formats': formats, | ||||
|             'subtitles': video_subtitles, | ||||
|         } | ||||
|  | ||||
|     def _download_subtitle_url(self, sub_lang, url): | ||||
|         # For some weird reason, blip.tv serves a video instead of subtitles | ||||
|         # when we request with a common UA | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Youtubedl-user-agent', 'youtube-dl') | ||||
|         return self._download_webpage(req, None, note=False) | ||||
|  | ||||
|  | ||||
| class BlipTVUserIE(InfoExtractor): | ||||
|     """Information Extractor for blip.tv users.""" | ||||
|  | ||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$' | ||||
|     _PAGE_SIZE = 12 | ||||
|     IE_NAME = 'blip.tv:user' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract username | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError('Invalid URL: %s' % url) | ||||
|  | ||||
|         username = mobj.group(1) | ||||
|  | ||||
|         page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' | ||||
| @@ -131,7 +145,6 @@ class BlipTVUserIE(InfoExtractor): | ||||
|         mobj = re.search(r'data-users-id="([^"]+)"', page) | ||||
|         page_base = page_base % mobj.group(1) | ||||
|  | ||||
|  | ||||
|         # Download video ids using BlipTV Ajax calls. Result size per | ||||
|         # query is limited (currently to 12 videos) so we need to query | ||||
|         # page by page until there are no video ids - it means we got | ||||
| @@ -142,8 +155,8 @@ class BlipTVUserIE(InfoExtractor): | ||||
|  | ||||
|         while True: | ||||
|             url = page_base + "&page=" + str(pagenum) | ||||
|             page = self._download_webpage(url, username, | ||||
|                                           'Downloading video ids from page %d' % pagenum) | ||||
|             page = self._download_webpage( | ||||
|                 url, username, 'Downloading video ids from page %d' % pagenum) | ||||
|  | ||||
|             # Extract video identifiers | ||||
|             ids_in_page = [] | ||||
| @@ -167,4 +180,4 @@ class BlipTVUserIE(InfoExtractor): | ||||
|  | ||||
|         urls = ['http://blip.tv/%s' % video_id for video_id in video_ids] | ||||
|         url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls] | ||||
|         return [self.playlist_result(url_entries, playlist_title = username)] | ||||
|         return [self.playlist_result(url_entries, playlist_title=username)] | ||||
|   | ||||
| @@ -24,5 +24,5 @@ class BloombergIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         ooyala_code = self._search_regex(r'<source src="http://player.ooyala.com/player/[^/]+/([^".]+)', webpage, u'ooyala url') | ||||
|         return OoyalaIE._build_url_result(ooyala_code) | ||||
|         ooyala_url = self._twitter_search_player(webpage) | ||||
|         return self.url_result(ooyala_url, OoyalaIE.ie_key()) | ||||
|   | ||||
| @@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor): | ||||
|  | ||||
|     @classmethod | ||||
|     def _extract_brightcove_url(cls, webpage): | ||||
|         """Try to extract the brightcove url from the wepbage, returns None | ||||
|         """Try to extract the brightcove url from the webpage, returns None | ||||
|         if it can't be found | ||||
|         """ | ||||
|         urls = cls._extract_brightcove_urls(webpage) | ||||
|         return urls[0] if urls else None | ||||
|  | ||||
|     @classmethod | ||||
|     def _extract_brightcove_urls(cls, webpage): | ||||
|         """Return a list of all Brightcove URLs from the webpage """ | ||||
|  | ||||
|         url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) | ||||
|         if url_m: | ||||
|             return url_m.group(1) | ||||
|             return [url_m.group(1)] | ||||
|  | ||||
|         m_brightcove = re.search( | ||||
|         matches = re.findall( | ||||
|             r'''(?sx)<object | ||||
|             (?: | ||||
|                 [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 | | ||||
|                 [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | | ||||
|                 [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ | ||||
|             ).+?</object>''', | ||||
|             webpage) | ||||
|         if m_brightcove is not None: | ||||
|             return cls._build_brighcove_url(m_brightcove.group()) | ||||
|         else: | ||||
|             return None | ||||
|         return [cls._build_brighcove_url(m) for m in matches] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| @@ -11,38 +11,38 @@ class Channel9IE(InfoExtractor): | ||||
|  | ||||
|     The type of provided URL (video or playlist) is determined according to | ||||
|     meta Search.PageType from web page HTML rather than URL itself, as it is | ||||
|     not always possible to do.     | ||||
|     not always possible to do. | ||||
|     ''' | ||||
|     IE_DESC = u'Channel 9' | ||||
|     IE_NAME = u'channel9' | ||||
|     IE_DESC = 'Channel 9' | ||||
|     IE_NAME = 'channel9' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', | ||||
|             u'file': u'Events_TechEd_Australia_2013_KOS002.mp4', | ||||
|             u'md5': u'bbd75296ba47916b754e73c3a4bbdf10', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Developer Kick-Off Session: Stuff We Love', | ||||
|                 u'description': u'md5:c08d72240b7c87fcecafe2692f80e35f', | ||||
|                 u'duration': 4576, | ||||
|                 u'thumbnail': u'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg', | ||||
|                 u'session_code': u'KOS002', | ||||
|                 u'session_day': u'Day 1', | ||||
|                 u'session_room': u'Arena 1A', | ||||
|                 u'session_speakers': [ u'Ed Blankenship', u'Andrew Coates', u'Brady Gaster', u'Patrick Klug', u'Mads Kristensen' ], | ||||
|             'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', | ||||
|             'file': 'Events_TechEd_Australia_2013_KOS002.mp4', | ||||
|             'md5': 'bbd75296ba47916b754e73c3a4bbdf10', | ||||
|             'info_dict': { | ||||
|                 'title': 'Developer Kick-Off Session: Stuff We Love', | ||||
|                 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', | ||||
|                 'duration': 4576, | ||||
|                 'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg', | ||||
|                 'session_code': 'KOS002', | ||||
|                 'session_day': 'Day 1', | ||||
|                 'session_room': 'Arena 1A', | ||||
|                 'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ], | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             u'url': u'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', | ||||
|             u'file': u'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4', | ||||
|             u'md5': u'b43ee4529d111bc37ba7ee4f34813e68', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Self-service BI with Power BI - nuclear testing', | ||||
|                 u'description': u'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', | ||||
|                 u'duration': 1540, | ||||
|                 u'thumbnail': u'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg', | ||||
|                 u'authors': [ u'Mike Wilmot' ], | ||||
|             'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', | ||||
|             'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4', | ||||
|             'md5': 'b43ee4529d111bc37ba7ee4f34813e68', | ||||
|             'info_dict': { | ||||
|                 'title': 'Self-service BI with Power BI - nuclear testing', | ||||
|                 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', | ||||
|                 'duration': 1540, | ||||
|                 'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg', | ||||
|                 'authors': [ 'Mike Wilmot' ], | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
| @@ -60,7 +60,7 @@ class Channel9IE(InfoExtractor): | ||||
|             return 0 | ||||
|         units = m.group('units') | ||||
|         try: | ||||
|             exponent = [u'B', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB'].index(units.upper()) | ||||
|             exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper()) | ||||
|         except ValueError: | ||||
|             return 0 | ||||
|         size = float(m.group('size')) | ||||
| @@ -80,7 +80,7 @@ class Channel9IE(InfoExtractor): | ||||
|             'url': x.group('url'), | ||||
|             'format_id': x.group('quality'), | ||||
|             'format_note': x.group('note'), | ||||
|             'format': u'%s (%s)' % (x.group('quality'), x.group('note')), | ||||
|             'format': '%s (%s)' % (x.group('quality'), x.group('note')), | ||||
|             'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate | ||||
|             'preference': self._known_formats.index(x.group('quality')), | ||||
|             'vcodec': 'none' if x.group('note') == 'Audio only' else None, | ||||
| @@ -91,10 +91,10 @@ class Channel9IE(InfoExtractor): | ||||
|         return formats | ||||
|  | ||||
|     def _extract_title(self, html): | ||||
|         title = self._html_search_meta(u'title', html, u'title') | ||||
|         title = self._html_search_meta('title', html, 'title') | ||||
|         if title is None:            | ||||
|             title = self._og_search_title(html) | ||||
|             TITLE_SUFFIX = u' (Channel 9)' | ||||
|             TITLE_SUFFIX = ' (Channel 9)' | ||||
|             if title is not None and title.endswith(TITLE_SUFFIX): | ||||
|                 title = title[:-len(TITLE_SUFFIX)] | ||||
|         return title | ||||
| @@ -110,7 +110,7 @@ class Channel9IE(InfoExtractor): | ||||
|         m = re.search(DESCRIPTION_REGEX, html) | ||||
|         if m is not None: | ||||
|             return m.group('description') | ||||
|         return self._html_search_meta(u'description', html, u'description') | ||||
|         return self._html_search_meta('description', html, 'description') | ||||
|  | ||||
|     def _extract_duration(self, html): | ||||
|         m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html) | ||||
| @@ -172,7 +172,7 @@ class Channel9IE(InfoExtractor): | ||||
|  | ||||
|         # Nothing to download | ||||
|         if len(formats) == 0 and slides is None and zip_ is None: | ||||
|             self._downloader.report_warning(u'None of recording, slides or zip are available for %s' % content_path) | ||||
|             self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path) | ||||
|             return | ||||
|  | ||||
|         # Extract meta | ||||
| @@ -244,7 +244,7 @@ class Channel9IE(InfoExtractor): | ||||
|         return contents | ||||
|  | ||||
|     def _extract_list(self, content_path): | ||||
|         rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS') | ||||
|         rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS') | ||||
|         entries = [self.url_result(session_url.text, 'Channel9') | ||||
|                    for session_url in rss.findall('./channel/item/link')] | ||||
|         title_text = rss.find('./channel/title').text | ||||
| @@ -254,11 +254,11 @@ class Channel9IE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         content_path = mobj.group('contentpath') | ||||
|  | ||||
|         webpage = self._download_webpage(url, content_path, u'Downloading web page') | ||||
|         webpage = self._download_webpage(url, content_path, 'Downloading web page') | ||||
|  | ||||
|         page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage) | ||||
|         if page_type_m is None: | ||||
|             raise ExtractorError(u'Search.PageType not found, don\'t know how to process this page', expected=True) | ||||
|             raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True) | ||||
|  | ||||
|         page_type = page_type_m.group('pagetype') | ||||
|         if page_type == 'List':         # List page, may contain list of 'item'-like objects | ||||
| @@ -268,4 +268,4 @@ class Channel9IE(InfoExtractor): | ||||
|         elif page_type == 'Session':    # Event session page, may contain downloadable content | ||||
|             return self._extract_session(webpage, content_path) | ||||
|         else: | ||||
|             raise ExtractorError(u'Unexpected Search.PageType %s' % page_type, expected=True) | ||||
|             raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True) | ||||
| @@ -1,12 +1,9 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import string | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| translation_table = { | ||||
|     'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n', | ||||
|   | ||||
| @@ -28,7 +28,25 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             'description': 'This video wasn\'t long enough, so we made it double-spaced.', | ||||
|             'age_limit': 10, | ||||
|         }, | ||||
|     }] | ||||
|     }, | ||||
|     # embedded youtube video | ||||
|     { | ||||
|         'url': 'http://www.collegehumor.com/embed/6950457', | ||||
|         'info_dict': { | ||||
|             'id': 'W5gMp3ZjYg4', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', | ||||
|             'uploader': 'Funnyplox TV', | ||||
|             'uploader_id': 'funnyploxtv', | ||||
|             'description': 'md5:506f69f7a297ed698ced3375f2363b0e', | ||||
|             'upload_date': '20140128', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -38,6 +56,12 @@ class CollegeHumorIE(InfoExtractor): | ||||
|         data = json.loads(self._download_webpage( | ||||
|             jsonUrl, video_id, 'Downloading info JSON')) | ||||
|         vdata = data['video'] | ||||
|         if vdata.get('youtubeId') is not None: | ||||
|             return { | ||||
|                 '_type': 'url', | ||||
|                 'url': vdata['youtubeId'], | ||||
|                 'ie_key': 'Youtube', | ||||
|             } | ||||
|  | ||||
|         AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0} | ||||
|         rating = vdata.get('rating') | ||||
| @@ -49,7 +73,7 @@ class CollegeHumorIE(InfoExtractor): | ||||
|         PREFS = {'high_quality': 2, 'low_quality': 0} | ||||
|         formats = [] | ||||
|         for format_key in ('mp4', 'webm'): | ||||
|             for qname, qurl in vdata[format_key].items(): | ||||
|             for qname, qurl in vdata.get(format_key, {}).items(): | ||||
|                 formats.append({ | ||||
|                     'format_id': format_key + '_' + qname, | ||||
|                     'url': qurl, | ||||
|   | ||||
| @@ -14,7 +14,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class ComedyCentralIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/ | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/ | ||||
|         (video-clips|episodes|cc-studios|video-collections) | ||||
|         /(?P<title>.*)''' | ||||
|     _FEED_URL = 'http://comedycentral.com/feeds/mrss/' | ||||
| @@ -86,7 +86,7 @@ class ComedyCentralShowsIE(InfoExtractor): | ||||
|  | ||||
|     @staticmethod | ||||
|     def _transform_rtmp_url(rtmp_video_url): | ||||
|         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) | ||||
|         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url) | ||||
|         if not m: | ||||
|             raise ExtractorError('Cannot transform RTMP url') | ||||
|         base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' | ||||
|   | ||||
| @@ -399,7 +399,7 @@ class InfoExtractor(object): | ||||
|     # Helper functions for extracting OpenGraph info | ||||
|     @staticmethod | ||||
|     def _og_regexes(prop): | ||||
|         content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')' | ||||
|         content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')' | ||||
|         property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop) | ||||
|         template = r'<meta[^>]+?%s[^>]+?%s' | ||||
|         return [ | ||||
| @@ -465,6 +465,10 @@ class InfoExtractor(object): | ||||
|         } | ||||
|         return RATING_TABLE.get(rating.lower(), None) | ||||
|  | ||||
|     def _twitter_search_player(self, html): | ||||
|         return self._html_search_meta('twitter:player', html, | ||||
|             'twitter card player') | ||||
|  | ||||
|     def _sort_formats(self, formats): | ||||
|         if not formats: | ||||
|             raise ExtractorError(u'No video formats found') | ||||
|   | ||||
| @@ -1,4 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re, base64, zlib | ||||
| from hashlib import sha1 | ||||
| from math import pow, sqrt, floor | ||||
| @@ -18,29 +20,29 @@ from ..aes import ( | ||||
| ) | ||||
|  | ||||
| class CrunchyrollIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)' | ||||
|     _VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', | ||||
|         u'file': u'645513.flv', | ||||
|         #u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', | ||||
|             u'description': u'md5:2d17137920c64f2f49981a7797d275ef', | ||||
|             u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', | ||||
|             u'uploader': u'Yomiuri Telecasting Corporation (YTV)', | ||||
|             u'upload_date': u'20131013', | ||||
|         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', | ||||
|         'file': '645513.flv', | ||||
|         #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412', | ||||
|         'info_dict': { | ||||
|             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', | ||||
|             'description': 'md5:2d17137920c64f2f49981a7797d275ef', | ||||
|             'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', | ||||
|             'uploader': 'Yomiuri Telecasting Corporation (YTV)', | ||||
|             'upload_date': '20131013', | ||||
|         }, | ||||
|         u'params': { | ||||
|         'params': { | ||||
|             # rtmp | ||||
|             u'skip_download': True, | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     _FORMAT_IDS = { | ||||
|         u'360': (u'60', u'106'), | ||||
|         u'480': (u'61', u'106'), | ||||
|         u'720': (u'62', u'106'), | ||||
|         u'1080': (u'80', u'108'), | ||||
|         '360': ('60', '106'), | ||||
|         '480': ('61', '106'), | ||||
|         '720': ('62', '106'), | ||||
|         '1080': ('80', '108'), | ||||
|     } | ||||
|  | ||||
|     def _decrypt_subtitles(self, data, iv, id): | ||||
| @@ -63,7 +65,7 @@ class CrunchyrollIE(InfoExtractor): | ||||
|             num3 = key ^ num1 | ||||
|             num4 = num3 ^ (num3 >> 3) ^ num2 | ||||
|             prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2))) | ||||
|             shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest()) | ||||
|             shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) | ||||
|             # Extend 160 Bit hash to 256 Bit | ||||
|             return shaHash + [0] * 12 | ||||
|          | ||||
| @@ -79,93 +81,98 @@ class CrunchyrollIE(InfoExtractor): | ||||
|  | ||||
|     def _convert_subtitles_to_srt(self, subtitles): | ||||
|         i=1 | ||||
|         output = u'' | ||||
|         output = '' | ||||
|         for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles): | ||||
|             start = start.replace(u'.', u',') | ||||
|             end = end.replace(u'.', u',') | ||||
|             start = start.replace('.', ',') | ||||
|             end = end.replace('.', ',') | ||||
|             text = clean_html(text) | ||||
|             text = text.replace(u'\\N', u'\n') | ||||
|             text = text.replace('\\N', '\n') | ||||
|             if not text: | ||||
|                 continue | ||||
|             output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text) | ||||
|             output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) | ||||
|             i+=1 | ||||
|         return output | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         webpage_url = u'http://www.' + mobj.group('url') | ||||
|         video_id = mobj.group(u'video_id') | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|         note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'') | ||||
|         if mobj.group('prefix') == 'm': | ||||
|             mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage') | ||||
|             webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url') | ||||
|         else: | ||||
|             webpage_url = 'http://www.' + mobj.group('url') | ||||
|  | ||||
|         webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage') | ||||
|         note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='') | ||||
|         if note_m: | ||||
|             raise ExtractorError(note_m) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL) | ||||
|         video_title = re.sub(r' {2,}', u' ', video_title) | ||||
|         video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'') | ||||
|         video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL) | ||||
|         video_title = re.sub(r' {2,}', ' ', video_title) | ||||
|         video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') | ||||
|         if not video_description: | ||||
|             video_description = None | ||||
|         video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL) | ||||
|         video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) | ||||
|         if video_upload_date: | ||||
|             video_upload_date = unified_strdate(video_upload_date) | ||||
|         video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL) | ||||
|         video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL) | ||||
|  | ||||
|         playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url')) | ||||
|         playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) | ||||
|         playerdata_req = compat_urllib_request.Request(playerdata_url) | ||||
|         playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url}) | ||||
|         playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded') | ||||
|         playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info') | ||||
|         playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) | ||||
|         playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') | ||||
|          | ||||
|         stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id') | ||||
|         video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False) | ||||
|         stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id') | ||||
|         video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) | ||||
|  | ||||
|         formats = [] | ||||
|         for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage): | ||||
|             stream_quality, stream_format = self._FORMAT_IDS[fmt] | ||||
|             video_format = fmt+u'p' | ||||
|             streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/') | ||||
|             video_format = fmt+'p' | ||||
|             streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/') | ||||
|             # urlencode doesn't work! | ||||
|             streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format | ||||
|             streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded') | ||||
|             streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data))) | ||||
|             streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format) | ||||
|             video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url') | ||||
|             video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path') | ||||
|             streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format | ||||
|             streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|             streamdata_req.add_header('Content-Length', str(len(streamdata_req.data))) | ||||
|             streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format) | ||||
|             video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url') | ||||
|             video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path') | ||||
|             formats.append({ | ||||
|                 u'url': video_url, | ||||
|                 u'play_path':   video_play_path, | ||||
|                 u'ext': 'flv', | ||||
|                 u'format': video_format, | ||||
|                 u'format_id': video_format, | ||||
|                 'url': video_url, | ||||
|                 'play_path':   video_play_path, | ||||
|                 'ext': 'flv', | ||||
|                 'format': video_format, | ||||
|                 'format_id': video_format, | ||||
|             }) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): | ||||
|             sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\ | ||||
|                                               video_id, note=u'Downloading subtitles for '+sub_name) | ||||
|             id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False) | ||||
|             iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False) | ||||
|             data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False) | ||||
|             sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\ | ||||
|                                               video_id, note='Downloading subtitles for '+sub_name) | ||||
|             id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) | ||||
|             iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) | ||||
|             data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) | ||||
|             if not id or not iv or not data: | ||||
|                 continue | ||||
|             id = int(id) | ||||
|             iv = base64.b64decode(iv) | ||||
|             data = base64.b64decode(data) | ||||
|  | ||||
|             subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8') | ||||
|             lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False) | ||||
|             subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') | ||||
|             lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False) | ||||
|             if not lang_code: | ||||
|                 continue | ||||
|             subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) | ||||
|  | ||||
|         return { | ||||
|             u'id':          video_id, | ||||
|             u'title':       video_title, | ||||
|             u'description': video_description, | ||||
|             u'thumbnail':   video_thumbnail, | ||||
|             u'uploader':    video_uploader, | ||||
|             u'upload_date': video_upload_date, | ||||
|             u'subtitles':   subtitles, | ||||
|             u'formats':     formats, | ||||
|             'id':          video_id, | ||||
|             'title':       video_title, | ||||
|             'description': video_description, | ||||
|             'thumbnail':   video_thumbnail, | ||||
|             'uploader':    video_uploader, | ||||
|             'upload_date': video_upload_date, | ||||
|             'subtitles':   subtitles, | ||||
|             'formats':     formats, | ||||
|         } | ||||
|   | ||||
| @@ -1,49 +1,60 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     find_xpath_attr, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CSpanIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P<name>.*)' | ||||
|     _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)' | ||||
|     IE_DESC = 'C-SPAN' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.c-spanvideo.org/program/HolderonV', | ||||
|         'file': '315139.mp4', | ||||
|         'url': 'http://www.c-span.org/video/?313572-1/HolderonV', | ||||
|         'md5': '8e44ce11f0f725527daccc453f553eb0', | ||||
|         'info_dict': { | ||||
|             'id': '315139', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Attorney General Eric Holder on Voting Rights Act Decision', | ||||
|             'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.', | ||||
|             'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.', | ||||
|         }, | ||||
|         'skip': 'Regularly fails on travis, for unknown reasons', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         prog_name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, prog_name) | ||||
|         video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id') | ||||
|         page_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|         video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title') | ||||
|         description = self._og_search_description(webpage) | ||||
|         description = self._html_search_regex( | ||||
|             [ | ||||
|                 # The full description | ||||
|                 r'<div class=\'expandable\'>(.*?)<a href=\'#\'', | ||||
|                 # If the description is small enough the other div is not | ||||
|                 # present, otherwise this is a stripped version | ||||
|                 r'<p class=\'initial\'>(.*?)</p>' | ||||
|             ], | ||||
|             webpage, 'description', flags=re.DOTALL) | ||||
|  | ||||
|         info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id | ||||
|         data_json = self._download_webpage( | ||||
|             info_url, video_id, 'Downloading video info') | ||||
|         data = json.loads(data_json) | ||||
|         data = self._download_json(info_url, video_id) | ||||
|  | ||||
|         url = unescapeHTML(data['video']['files'][0]['path']['#text']) | ||||
|  | ||||
|         doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id, | ||||
|             video_id) | ||||
|  | ||||
|         def find_string(s): | ||||
|             return find_xpath_attr(doc, './/string', 'name', s).text | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'title': find_string('title'), | ||||
|             'url': url, | ||||
|             'description': description, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'thumbnail': find_string('poster'), | ||||
|         } | ||||
|   | ||||
| @@ -1,4 +1,7 @@ | ||||
| # encoding: utf-8 | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| @@ -30,7 +33,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class PluzzIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'pluzz.francetv.fr' | ||||
|     IE_NAME = 'pluzz.francetv.fr' | ||||
|     _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' | ||||
|  | ||||
|     # Can't use tests, videos expire in 7 days | ||||
| @@ -44,17 +47,17 @@ class PluzzIE(FranceTVBaseInfoExtractor): | ||||
|  | ||||
|  | ||||
| class FranceTvInfoIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'francetvinfo.fr' | ||||
|     IE_NAME = 'francetvinfo.fr' | ||||
|     _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', | ||||
|         u'file': u'84981923.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Soir 3', | ||||
|         'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', | ||||
|         'file': '84981923.mp4', | ||||
|         'info_dict': { | ||||
|             'title': 'Soir 3', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -62,13 +65,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|         video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id') | ||||
|         video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id') | ||||
|         return self._extract_video(video_id) | ||||
|  | ||||
|  | ||||
| class FranceTVIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'francetv' | ||||
|     IE_DESC = u'France 2, 3, 4, 5 and Ô' | ||||
|     IE_NAME = 'francetv' | ||||
|     IE_DESC = 'France 2, 3, 4, 5 and Ô' | ||||
|     _VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/ | ||||
|         (?: | ||||
|             emissions/.*?/(videos|emissions)/(?P<id>[^/?]+) | ||||
| @@ -78,73 +81,73 @@ class FranceTVIE(FranceTVBaseInfoExtractor): | ||||
|     _TESTS = [ | ||||
|         # france2 | ||||
|         { | ||||
|             u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', | ||||
|             u'file': u'75540104.mp4', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'13h15, le samedi...', | ||||
|                 u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d', | ||||
|             'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', | ||||
|             'file': '75540104.mp4', | ||||
|             'info_dict': { | ||||
|                 'title': '13h15, le samedi...', | ||||
|                 'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d', | ||||
|             }, | ||||
|             u'params': { | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 u'skip_download': True, | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # france3 | ||||
|         { | ||||
|             u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575', | ||||
|             u'info_dict': { | ||||
|                 u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au', | ||||
|                 u'ext': u'flv', | ||||
|                 u'title': u'Le scandale du prix des médicaments', | ||||
|                 u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce', | ||||
|             'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575', | ||||
|             'info_dict': { | ||||
|                 'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Le scandale du prix des médicaments', | ||||
|                 'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce', | ||||
|             }, | ||||
|             u'params': { | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 u'skip_download': True, | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # france4 | ||||
|         { | ||||
|             u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', | ||||
|             u'info_dict': { | ||||
|                 u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', | ||||
|                 u'ext': u'flv', | ||||
|                 u'title': u'Hero Corp Making of - Extrait 1', | ||||
|                 u'description': u'md5:c87d54871b1790679aec1197e73d650a', | ||||
|             'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', | ||||
|             'info_dict': { | ||||
|                 'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Hero Corp Making of - Extrait 1', | ||||
|                 'description': 'md5:c87d54871b1790679aec1197e73d650a', | ||||
|             }, | ||||
|             u'params': { | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 u'skip_download': True, | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # france5 | ||||
|         { | ||||
|             u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968', | ||||
|             u'info_dict': { | ||||
|                 u'id': u'92837968', | ||||
|                 u'ext': u'mp4', | ||||
|                 u'title': u'C à dire ?!', | ||||
|                 u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f', | ||||
|             'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968', | ||||
|             'info_dict': { | ||||
|                 'id': '92837968', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'C à dire ?!', | ||||
|                 'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f', | ||||
|             }, | ||||
|             u'params': { | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 u'skip_download': True, | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # franceo | ||||
|         { | ||||
|             u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013', | ||||
|             u'info_dict': { | ||||
|                 u'id': u'92327925', | ||||
|                 u'ext': u'mp4', | ||||
|                 u'title': u'Infô-Afrique', | ||||
|                 u'description': u'md5:ebf346da789428841bee0fd2a935ea55', | ||||
|             'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013', | ||||
|             'info_dict': { | ||||
|                 'id': '92327925', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Infô-Afrique', | ||||
|                 'description': 'md5:ebf346da789428841bee0fd2a935ea55', | ||||
|             }, | ||||
|             u'params': { | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 u'skip_download': True, | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             u'skip': u'The id changes frequently', | ||||
|             'skip': 'The id changes frequently', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
| @@ -160,26 +163,26 @@ class FranceTVIE(FranceTVBaseInfoExtractor): | ||||
|                  '\.fr/\?id-video=([^"/&]+)'), | ||||
|                 (r'<a class="video" id="ftv_player_(.+?)"'), | ||||
|             ] | ||||
|             video_id = self._html_search_regex(id_res, webpage, u'video ID') | ||||
|             video_id = self._html_search_regex(id_res, webpage, 'video ID') | ||||
|         else: | ||||
|             video_id = mobj.group('id') | ||||
|         return self._extract_video(video_id) | ||||
|  | ||||
|  | ||||
| class GenerationQuoiIE(InfoExtractor): | ||||
|     IE_NAME = u'france2.fr:generation-quoi' | ||||
|     IE_NAME = 'france2.fr:generation-quoi' | ||||
|     _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous', | ||||
|         u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Génération Quoi - Garde à Vous', | ||||
|             u'uploader': u'Génération Quoi', | ||||
|         'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', | ||||
|         'file': 'k7FJX8VBcvvLmX4wA5Q.mp4', | ||||
|         'info_dict': { | ||||
|             'title': 'Génération Quoi - Garde à Vous', | ||||
|             'uploader': 'Génération Quoi', | ||||
|         }, | ||||
|         u'params': { | ||||
|         'params': { | ||||
|             # It uses Dailymotion | ||||
|             u'skip_download': True, | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -194,20 +197,20 @@ class GenerationQuoiIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class CultureboxIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'culturebox.francetvinfo.fr' | ||||
|     IE_NAME = 'culturebox.francetvinfo.fr' | ||||
|     _VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813', | ||||
|         u'info_dict': { | ||||
|             u'id': u'EV_6785', | ||||
|             u'ext': u'mp4', | ||||
|             u'title': u'Einstein on the beach au Théâtre du Châtelet', | ||||
|             u'description': u'md5:9ce2888b1efefc617b5e58b3f6200eeb', | ||||
|         'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813', | ||||
|         'info_dict': { | ||||
|             'id': 'EV_6785', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Einstein on the beach au Théâtre du Châtelet', | ||||
|             'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb', | ||||
|         }, | ||||
|         u'params': { | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             u'skip_download': True, | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -215,5 +218,5 @@ class CultureboxIE(FranceTVBaseInfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, u'video id') | ||||
|         video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id') | ||||
|         return self._extract_video(video_id) | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -6,13 +8,16 @@ from .common import InfoExtractor | ||||
| class FunnyOrDieIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', | ||||
|         u'file': u'0732f586d7.mp4', | ||||
|         u'md5': u'f647e9e90064b53b6e046e75d0241fbd', | ||||
|         u'info_dict': { | ||||
|             u"description": u"Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.",  | ||||
|             u"title": u"Heart-Shaped Box: Literal Video Version" | ||||
|         } | ||||
|         'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', | ||||
|         'file': '0732f586d7.mp4', | ||||
|         'md5': 'f647e9e90064b53b6e046e75d0241fbd', | ||||
|         'info_dict': { | ||||
|             'description': ('Lyrics changed to match the video. Spoken cameo ' | ||||
|                 'by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a ' | ||||
|                 'concept by Dustin McLean (DustFilms.com). Performed, edited, ' | ||||
|                 'and written by David A. Scott.'), | ||||
|             'title': 'Heart-Shaped Box: Literal Video Version', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -23,13 +28,12 @@ class FunnyOrDieIE(InfoExtractor): | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''], | ||||
|             webpage, u'video URL', flags=re.DOTALL) | ||||
|             webpage, 'video URL', flags=re.DOTALL) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
|         return [info] | ||||
|   | ||||
| @@ -38,18 +38,6 @@ class GenericIE(InfoExtractor): | ||||
|                 'title': 'R\u00e9gis plante sa Jeep', | ||||
|             } | ||||
|         }, | ||||
|         # embedded vimeo video | ||||
|         { | ||||
|             'add_ie': ['Vimeo'], | ||||
|             'url': 'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references', | ||||
|             'file': '22444065.mp4', | ||||
|             'md5': '2903896e23df39722c33f015af0666e2', | ||||
|             'info_dict': { | ||||
|                 'title': 'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011', | ||||
|                 'uploader_id': 'skillsmatter', | ||||
|                 'uploader': 'Skills Matter', | ||||
|             } | ||||
|         }, | ||||
|         # bandcamp page with custom domain | ||||
|         { | ||||
|             'add_ie': ['Bandcamp'], | ||||
| @@ -246,15 +234,25 @@ class GenericIE(InfoExtractor): | ||||
|             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') | ||||
|  | ||||
|         # Look for BrightCove: | ||||
|         bc_url = BrightcoveIE._extract_brightcove_url(webpage) | ||||
|         if bc_url is not None: | ||||
|         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage) | ||||
|         if bc_urls: | ||||
|             self.to_screen('Brightcove video detected.') | ||||
|             surl = smuggle_url(bc_url, {'Referer': url}) | ||||
|             return self.url_result(surl, 'Brightcove') | ||||
|             entries = [{ | ||||
|                 '_type': 'url', | ||||
|                 'url': smuggle_url(bc_url, {'Referer': url}), | ||||
|                 'ie_key': 'Brightcove' | ||||
|             } for bc_url in bc_urls] | ||||
|  | ||||
|             return { | ||||
|                 '_type': 'playlist', | ||||
|                 'title': video_title, | ||||
|                 'id': video_id, | ||||
|                 'entries': entries, | ||||
|             } | ||||
|  | ||||
|         # Look for embedded (iframe) Vimeo player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src="((?:https?:)?//player.vimeo.com/video/.+?)"', webpage) | ||||
|             r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage) | ||||
|         if mobj: | ||||
|             player_url = unescapeHTML(mobj.group(1)) | ||||
|             surl = smuggle_url(player_url, {'Referer': url}) | ||||
| @@ -262,7 +260,7 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for embedded (swf embed) Vimeo player | ||||
|         mobj = re.search( | ||||
|             r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage) | ||||
|             r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) | ||||
|         if mobj: | ||||
|             return self.url_result(mobj.group(1), 'Vimeo') | ||||
|  | ||||
| @@ -332,7 +330,7 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result(mobj.group(1), 'Aparat') | ||||
|  | ||||
|         # Look for MPORA videos | ||||
|         mobj = re.search(r'<iframe .*?src="(http://mpora\.com/videos/[^"]+)"', webpage) | ||||
|         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group(1), 'Mpora') | ||||
|  | ||||
| @@ -350,7 +348,7 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for embedded Huffington Post player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage) | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'HuffPost') | ||||
|  | ||||
| @@ -358,7 +356,7 @@ class GenericIE(InfoExtractor): | ||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|             # Look for gorilla-vid style embedding | ||||
|             mobj = re.search(r'(?s)jw_plugins.*?file:\s*["\'](.*?)["\']', webpage) | ||||
|             mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage) | ||||
|         if mobj is None: | ||||
|             # Broaden the search a little bit | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) | ||||
|   | ||||
| @@ -1,62 +1,55 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class InfoQIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$' | ||||
|     _TEST = { | ||||
|         u"name": u"InfoQ", | ||||
|         u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", | ||||
|         u"file": u"12-jan-pythonthings.mp4", | ||||
|         u"info_dict": { | ||||
|             u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", | ||||
|             u"title": u"A Few of My Favorite [Python] Things" | ||||
|         "name": "InfoQ", | ||||
|         "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", | ||||
|         "file": "12-jan-pythonthings.mp4", | ||||
|         "info_dict": { | ||||
|             "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", | ||||
|             "title": "A Few of My Favorite [Python] Things", | ||||
|         }, | ||||
|         "params": { | ||||
|             "skip_download": True, | ||||
|         }, | ||||
|         u"params": { | ||||
|             u"skip_download": True | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id=url) | ||||
|         self.report_extraction(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Extract video URL | ||||
|         mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract video url') | ||||
|         real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8')) | ||||
|         encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id') | ||||
|         real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) | ||||
|         video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id | ||||
|  | ||||
|         # Extract title | ||||
|         video_title = self._search_regex(r'contentTitle = "(.*?)";', | ||||
|             webpage, u'title') | ||||
|             webpage, 'title') | ||||
|  | ||||
|         # Extract description | ||||
|         video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>', | ||||
|             webpage, u'description', fatal=False) | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         video_filename = video_url.split('/')[-1] | ||||
|         video_id, extension = video_filename.split('.') | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'uploader': None, | ||||
|             'upload_date': None, | ||||
|             'title': video_title, | ||||
|             'ext': extension, # Extension is always(?) mp4, but seems to be flv | ||||
|             'thumbnail': None, | ||||
|             'ext': extension,  # Extension is always(?) mp4, but seems to be flv | ||||
|             'description': video_description, | ||||
|         } | ||||
|  | ||||
|         return [info] | ||||
| @@ -1,4 +1,5 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| @@ -11,38 +12,38 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class IviIE(InfoExtractor): | ||||
|     IE_DESC = u'ivi.ru' | ||||
|     IE_NAME = u'ivi' | ||||
|     IE_DESC = 'ivi.ru' | ||||
|     IE_NAME = 'ivi' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         # Single movie | ||||
|         { | ||||
|             u'url': u'http://www.ivi.ru/watch/53141', | ||||
|             u'file': u'53141.mp4', | ||||
|             u'md5': u'6ff5be2254e796ed346251d117196cf4', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Иван Васильевич меняет профессию', | ||||
|                 u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346', | ||||
|                 u'duration': 5498, | ||||
|                 u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg', | ||||
|             'url': 'http://www.ivi.ru/watch/53141', | ||||
|             'file': '53141.mp4', | ||||
|             'md5': '6ff5be2254e796ed346251d117196cf4', | ||||
|             'info_dict': { | ||||
|                 'title': 'Иван Васильевич меняет профессию', | ||||
|                 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f', | ||||
|                 'duration': 5498, | ||||
|                 'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg', | ||||
|             }, | ||||
|             u'skip': u'Only works from Russia', | ||||
|             'skip': 'Only works from Russia', | ||||
|         }, | ||||
|         # Serial's serie | ||||
|         { | ||||
|             u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791', | ||||
|             u'file': u'74791.mp4', | ||||
|             u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Дежурный ангел - 1 серия', | ||||
|                 u'duration': 2490, | ||||
|                 u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', | ||||
|             'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791', | ||||
|             'file': '74791.mp4', | ||||
|             'md5': '3e6cc9a848c1d2ebcc6476444967baa9', | ||||
|             'info_dict': { | ||||
|                 'title': 'Дежурный ангел - 1 серия', | ||||
|                 'duration': 2490, | ||||
|                 'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', | ||||
|             }, | ||||
|             u'skip': u'Only works from Russia', | ||||
|             'skip': 'Only works from Russia', | ||||
|          } | ||||
|     ] | ||||
|      | ||||
|  | ||||
|     # Sorted by quality | ||||
|     _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] | ||||
|  | ||||
| @@ -54,7 +55,7 @@ class IviIE(InfoExtractor): | ||||
|         return m.group('description') if m is not None else None | ||||
|  | ||||
|     def _extract_comment_count(self, html): | ||||
|         m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html) | ||||
|         m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html) | ||||
|         return int(m.group('commentcount')) if m is not None else 0 | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -63,49 +64,49 @@ class IviIE(InfoExtractor): | ||||
|  | ||||
|         api_url = 'http://api.digitalaccess.ru/api/json/' | ||||
|  | ||||
|         data = {u'method': u'da.content.get', | ||||
|                 u'params': [video_id, {u'site': u's183', | ||||
|                                        u'referrer': u'http://www.ivi.ru/watch/%s' % video_id, | ||||
|                                        u'contentid': video_id | ||||
|                                     } | ||||
|                             ] | ||||
|         data = {'method': 'da.content.get', | ||||
|                 'params': [video_id, {'site': 's183', | ||||
|                                       'referrer': 'http://www.ivi.ru/watch/%s' % video_id, | ||||
|                                       'contentid': video_id | ||||
|                                       } | ||||
|                            ] | ||||
|                 } | ||||
|  | ||||
|         request = compat_urllib_request.Request(api_url, json.dumps(data)) | ||||
|  | ||||
|         video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON') | ||||
|         video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON') | ||||
|         video_json = json.loads(video_json_page) | ||||
|  | ||||
|         if u'error' in video_json: | ||||
|             error = video_json[u'error'] | ||||
|             if error[u'origin'] == u'NoRedisValidData': | ||||
|                 raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) | ||||
|             raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True) | ||||
|         if 'error' in video_json: | ||||
|             error = video_json['error'] | ||||
|             if error['origin'] == 'NoRedisValidData': | ||||
|                 raise ExtractorError('Video %s does not exist' % video_id, expected=True) | ||||
|             raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True) | ||||
|  | ||||
|         result = video_json[u'result'] | ||||
|         result = video_json['result'] | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': x[u'url'], | ||||
|             'format_id': x[u'content_format'], | ||||
|             'preference': self._known_formats.index(x[u'content_format']), | ||||
|         } for x in result[u'files'] if x[u'content_format'] in self._known_formats] | ||||
|             'url': x['url'], | ||||
|             'format_id': x['content_format'], | ||||
|             'preference': self._known_formats.index(x['content_format']), | ||||
|         } for x in result['files'] if x['content_format'] in self._known_formats] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         if not formats: | ||||
|             raise ExtractorError(u'No media links available for %s' % video_id) | ||||
|             raise ExtractorError('No media links available for %s' % video_id) | ||||
|  | ||||
|         duration = result[u'duration'] | ||||
|         compilation = result[u'compilation'] | ||||
|         title = result[u'title'] | ||||
|         duration = result['duration'] | ||||
|         compilation = result['compilation'] | ||||
|         title = result['title'] | ||||
|  | ||||
|         title = '%s - %s' % (compilation, title) if compilation is not None else title   | ||||
|  | ||||
|         previews = result[u'preview'] | ||||
|         previews = result['preview'] | ||||
|         previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format'])) | ||||
|         thumbnail = previews[-1][u'url'] if len(previews) > 0 else None | ||||
|         thumbnail = previews[-1]['url'] if len(previews) > 0 else None | ||||
|  | ||||
|         video_page = self._download_webpage(url, video_id, u'Downloading video page') | ||||
|         video_page = self._download_webpage(url, video_id, 'Downloading video page') | ||||
|         description = self._extract_description(video_page) | ||||
|         comment_count = self._extract_comment_count(video_page) | ||||
|  | ||||
| @@ -121,8 +122,8 @@ class IviIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class IviCompilationIE(InfoExtractor): | ||||
|     IE_DESC = u'ivi.ru compilations' | ||||
|     IE_NAME = u'ivi:compilation' | ||||
|     IE_DESC = 'ivi.ru compilations' | ||||
|     IE_NAME = 'ivi:compilation' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$' | ||||
|  | ||||
|     def _extract_entries(self, html, compilation_id): | ||||
| @@ -135,22 +136,23 @@ class IviCompilationIE(InfoExtractor): | ||||
|         season_id = mobj.group('seasonid') | ||||
|  | ||||
|         if season_id is not None: # Season link | ||||
|             season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id) | ||||
|             season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id) | ||||
|             playlist_id = '%s/season%s' % (compilation_id, season_id) | ||||
|             playlist_title = self._html_search_meta(u'title', season_page, u'title') | ||||
|             playlist_title = self._html_search_meta('title', season_page, 'title') | ||||
|             entries = self._extract_entries(season_page, compilation_id) | ||||
|         else: # Compilation link             | ||||
|             compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page') | ||||
|             compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page') | ||||
|             playlist_id = compilation_id | ||||
|             playlist_title = self._html_search_meta(u'title', compilation_page, u'title') | ||||
|             playlist_title = self._html_search_meta('title', compilation_page, 'title') | ||||
|             seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page) | ||||
|             if len(seasons) == 0: # No seasons in this compilation | ||||
|                 entries = self._extract_entries(compilation_page, compilation_id) | ||||
|             else: | ||||
|                 entries = [] | ||||
|                 for season_id in seasons: | ||||
|                     season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id), | ||||
|                                                          compilation_id, u'Downloading season %s web page' % season_id) | ||||
|                     season_page = self._download_webpage( | ||||
|                         'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id), | ||||
|                         compilation_id, 'Downloading season %s web page' % season_id) | ||||
|                     entries.extend(self._extract_entries(season_page, compilation_id)) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, playlist_title) | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -5,36 +7,34 @@ from .common import InfoExtractor | ||||
|  | ||||
| class KeekIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)' | ||||
|     IE_NAME = u'keek' | ||||
|     IE_NAME = 'keek' | ||||
|     _TEST = { | ||||
|         u'url': u'https://www.keek.com/ytdl/keeks/NODfbab', | ||||
|         u'file': u'NODfbab.mp4', | ||||
|         u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"ytdl",  | ||||
|             u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." | ||||
|         } | ||||
|         'url': 'https://www.keek.com/ytdl/keeks/NODfbab', | ||||
|         'file': 'NODfbab.mp4', | ||||
|         'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83', | ||||
|         'info_dict': { | ||||
|             'uploader': 'ytdl', | ||||
|             'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('videoID') | ||||
|  | ||||
|         video_url = u'http://cdn.keek.com/keek/video/%s' % video_id | ||||
|         thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id | ||||
|         video_url = 'http://cdn.keek.com/keek/video/%s' % video_id | ||||
|         thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._og_search_title(webpage) | ||||
|         uploader = self._html_search_regex( | ||||
|             r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|  | ||||
|         uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', | ||||
|             webpage, u'uploader', fatal=False) | ||||
|  | ||||
|         info = { | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'title': video_title, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'uploader': uploader | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader | ||||
|         } | ||||
|         return [info] | ||||
|   | ||||
							
								
								
									
										63
									
								
								youtube_dl/extractor/lifenews.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								youtube_dl/extractor/lifenews.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,63 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
|  | ||||
|  | ||||
| class LifeNewsIE(InfoExtractor): | ||||
|     IE_NAME = 'lifenews' | ||||
|     IE_DESC = 'LIFE | NEWS' | ||||
|     _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)' | ||||
|      | ||||
|     _TEST = { | ||||
|         'url': 'http://lifenews.ru/news/126342', | ||||
|         'file': '126342.mp4', | ||||
|         'md5': 'e1b50a5c5fb98a6a544250f2e0db570a', | ||||
|         'info_dict': { | ||||
|             'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом', | ||||
|             'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.', | ||||
|             'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg', | ||||
|             'upload_date': '20140130', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page') | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<video.*?src="([^"]+)"></video>', webpage, 'video URL') | ||||
|          | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail') | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS' | ||||
|         if title.endswith(TITLE_SUFFIX): | ||||
|             title = title[:-len(TITLE_SUFFIX)] | ||||
|  | ||||
|         description = self._og_search_description(webpage) | ||||
|  | ||||
|         view_count = self._html_search_regex( | ||||
|             r'<div class=\'views\'>(\d+)</div>', webpage, 'view count') | ||||
|         comment_count = self._html_search_regex( | ||||
|             r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count') | ||||
|  | ||||
|         upload_date = self._html_search_regex( | ||||
|             r'<time datetime=\'([^\']+)\'>', webpage, 'upload date') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'thumbnail': thumbnail, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'upload_date': unified_strdate(upload_date), | ||||
|         } | ||||
| @@ -1,52 +1,63 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LiveLeakIE(InfoExtractor): | ||||
|  | ||||
|     _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)' | ||||
|     IE_NAME = u'liveleak' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.liveleak.com/view?i=757_1364311680', | ||||
|         u'file': u'757_1364311680.mp4', | ||||
|         u'md5': u'0813c2430bea7a46bf13acf3406992f4', | ||||
|         u'info_dict': { | ||||
|             u"description": u"extremely bad day for this guy..!",  | ||||
|             u"uploader": u"ljfriel2",  | ||||
|             u"title": u"Most unlucky car accident" | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.liveleak.com/view?i=757_1364311680', | ||||
|         'file': '757_1364311680.mp4', | ||||
|         'md5': '0813c2430bea7a46bf13acf3406992f4', | ||||
|         'info_dict': { | ||||
|             'description': 'extremely bad day for this guy..!', | ||||
|             'uploader': 'ljfriel2', | ||||
|             'title': 'Most unlucky car accident' | ||||
|         } | ||||
|     } | ||||
|     }, | ||||
|     { | ||||
|         'url': 'http://www.liveleak.com/view?i=f93_1390833151', | ||||
|         'file': 'f93_1390833151.mp4', | ||||
|         'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf', | ||||
|         'info_dict': { | ||||
|             'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.', | ||||
|             'uploader': 'ARD_Stinkt', | ||||
|             'title': 'German Television does first Edward Snowden Interview (ENGLISH)', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         sources_raw = self._search_regex( | ||||
|             r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None) | ||||
|         if sources_raw is None: | ||||
|             sources_raw = '[{ %s}]' % ( | ||||
|                 self._search_regex(r'(file: ".*?"),', webpage, 'video URL')) | ||||
|  | ||||
|         video_url = self._search_regex(r'file: "(.*?)",', | ||||
|             webpage, u'video URL') | ||||
|         sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw) | ||||
|         sources = json.loads(sources_json) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_note': s.get('label'), | ||||
|             'url': s['file'], | ||||
|         } for s in sources] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip() | ||||
|  | ||||
|         video_description = self._og_search_description(webpage) | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False) | ||||
|  | ||||
|         video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>', | ||||
|             webpage, u'uploader', fatal=False) | ||||
|  | ||||
|         info = { | ||||
|             'id':  video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'description': video_description, | ||||
|             'uploader': video_uploader | ||||
|             'uploader': video_uploader, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|         return [info] | ||||
|   | ||||
| @@ -16,7 +16,8 @@ class MalemotionIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             "title": "Bien dur", | ||||
|             "age_limit": 18, | ||||
|         } | ||||
|         }, | ||||
|         'skip': 'This video has been deleted.' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -119,7 +119,9 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|             if mgid.endswith('.swf'): | ||||
|                 mgid = mgid[:-4] | ||||
|         except RegexNotFoundError: | ||||
|             mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid') | ||||
|             mgid = self._search_regex( | ||||
|                 [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], | ||||
|                 webpage, u'mgid') | ||||
|         return self._get_videos_info(mgid) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| from __future__ import unicode_literals | ||||
| import os.path | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -11,13 +12,13 @@ from ..utils import ( | ||||
| class MySpassIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.myspass\.de/.*' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', | ||||
|         u'file': u'11741.mp4', | ||||
|         u'md5': u'0b49f4844a068f8b33f4b7c88405862b', | ||||
|         u'info_dict': { | ||||
|             u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",  | ||||
|             u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" | ||||
|         } | ||||
|         'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', | ||||
|         'file': '11741.mp4', | ||||
|         'md5': '0b49f4844a068f8b33f4b7c88405862b', | ||||
|         'info_dict': { | ||||
|             "description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?", | ||||
|             "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2", | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -37,12 +38,11 @@ class MySpassIE(InfoExtractor): | ||||
|         # extract values from metadata | ||||
|         url_flv_el = metadata.find('url_flv') | ||||
|         if url_flv_el is None: | ||||
|             raise ExtractorError(u'Unable to extract download url') | ||||
|             raise ExtractorError('Unable to extract download url') | ||||
|         video_url = url_flv_el.text | ||||
|         extension = os.path.splitext(video_url)[1][1:] | ||||
|         title_el = metadata.find('title') | ||||
|         if title_el is None: | ||||
|             raise ExtractorError(u'Unable to extract title') | ||||
|             raise ExtractorError('Unable to extract title') | ||||
|         title = title_el.text | ||||
|         format_id_el = metadata.find('format_id') | ||||
|         if format_id_el is None: | ||||
| @@ -59,13 +59,12 @@ class MySpassIE(InfoExtractor): | ||||
|             thumbnail = imagePreview_el.text | ||||
|         else: | ||||
|             thumbnail = None | ||||
|         info = { | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'ext': extension, | ||||
|             'format': format, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description | ||||
|             'description': description, | ||||
|         } | ||||
|         return [info] | ||||
|   | ||||
| @@ -1,48 +1,39 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NBAIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', | ||||
|         u'file': u'0021200253-okc-bkn-recap.nba.mp4', | ||||
|         u'md5': u'c0edcfc37607344e2ff8f13c378c88a4', | ||||
|         u'info_dict': { | ||||
|             u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",  | ||||
|             u"title": u"Thunder vs. Nets" | ||||
|         } | ||||
|         'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', | ||||
|         'file': u'0021200253-okc-bkn-recap.nba.mp4', | ||||
|         'md5': u'c0edcfc37607344e2ff8f13c378c88a4', | ||||
|         'info_dict': { | ||||
|             'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', | ||||
|             'title': 'Thunder vs. Nets', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|  | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' | ||||
|         video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' | ||||
|  | ||||
|         shortened_video_id = video_id.rpartition('/')[2] | ||||
|         title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '') | ||||
|  | ||||
|         # It isn't there in the HTML it returns to us | ||||
|         # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False) | ||||
|  | ||||
|         description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': shortened_video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': title, | ||||
|             # 'uploader_date': uploader_date, | ||||
|             'description': description, | ||||
|         } | ||||
|         return [info] | ||||
|   | ||||
| @@ -4,18 +4,18 @@ import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import determine_ext | ||||
|  | ||||
|  | ||||
| class NewgroundsIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.newgrounds.com/audio/listen/549479', | ||||
|         'file': '549479.mp3', | ||||
|         'md5': 'fe6033d297591288fa1c1f780386f07a', | ||||
|         'info_dict': { | ||||
|             "title": "B7 - BusMode", | ||||
|             "uploader": "Burn7", | ||||
|             'id': '549479', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'B7 - BusMode', | ||||
|             'uploader': 'Burn7', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| @@ -9,13 +11,13 @@ class NineGagIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u"url": u"http://9gag.tv/v/1912", | ||||
|         u"file": u"1912.mp4", | ||||
|         u"info_dict": { | ||||
|             u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", | ||||
|             u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome" | ||||
|         "url": "http://9gag.tv/v/1912", | ||||
|         "file": "1912.mp4", | ||||
|         "info_dict": { | ||||
|             "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", | ||||
|             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome" | ||||
|         }, | ||||
|         u'add_ie': [u'Youtube'] | ||||
|         'add_ie': ['Youtube'] | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -25,7 +27,7 @@ class NineGagIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         data_json = self._html_search_regex(r'''(?x) | ||||
|             <div\s*id="tv-video"\s*data-video-source="youtube"\s* | ||||
|                 data-video-meta="([^"]+)"''', webpage, u'video metadata') | ||||
|                 data-video-meta="([^"]+)"''', webpage, 'video metadata') | ||||
|  | ||||
|         data = json.loads(data_json) | ||||
|  | ||||
|   | ||||
							
								
								
									
										61
									
								
								youtube_dl/extractor/normalboots.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								youtube_dl/extractor/normalboots.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
| class NormalbootsIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://normalboots.com/video/home-alone-games-jontron/', | ||||
|         u'file': u'home-alone-games-jontron.mp4', | ||||
|         u'md5': u'8bf6de238915dd501105b44ef5f1e0f6', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Home Alone Games - JonTron - NormalBoots', | ||||
|             u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/', | ||||
|             u'uploader': u'JonTron', | ||||
|             u'upload_date': u'20140125', | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('videoid') | ||||
|          | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'uploader': None, | ||||
|             'upload_date': None, | ||||
|         } | ||||
|          | ||||
|         if url[:4] != 'http': | ||||
|             url = 'http://' + url | ||||
|          | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_title = self._og_search_title(webpage) | ||||
|         video_description = self._og_search_description(webpage) | ||||
|         video_thumbnail = self._og_search_thumbnail(webpage) | ||||
|         video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>', | ||||
|             webpage, 'uploader') | ||||
|         raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',  | ||||
|             webpage, 'date') | ||||
|         video_upload_date = unified_strdate(raw_upload_date) | ||||
|         video_upload_date = unified_strdate(raw_upload_date) | ||||
|              | ||||
|         player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url') | ||||
|         player_page = self._download_webpage(player_url, video_id) | ||||
|         video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file') | ||||
|          | ||||
|         info['url'] = video_url | ||||
|         info['title'] = video_title | ||||
|         info['description'] = video_description | ||||
|         info['thumbnail'] = video_thumbnail | ||||
|         info['uploader'] = video_uploader | ||||
|         info['upload_date'] = video_upload_date | ||||
|          | ||||
|         return info | ||||
| @@ -5,7 +5,7 @@ from .common import InfoExtractor | ||||
| from ..utils import unescapeHTML | ||||
|  | ||||
| class OoyalaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)' | ||||
|     _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -7,12 +9,12 @@ from ..utils import compat_urllib_parse | ||||
| class PornHdIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', | ||||
|         u'file': u'1962.flv', | ||||
|         u'md5': u'35272469887dca97abd30abecc6cdf75', | ||||
|         u'info_dict': { | ||||
|             u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video", | ||||
|             u"age_limit": 18, | ||||
|         'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', | ||||
|         'file': '1962.flv', | ||||
|         'md5': '35272469887dca97abd30abecc6cdf75', | ||||
|         'info_dict': { | ||||
|             "title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video", | ||||
|             "age_limit": 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -24,9 +26,13 @@ class PornHdIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'&hd=(http.+?)&', webpage, u'video URL') | ||||
|         video_url = compat_urllib_parse.unquote(video_url) | ||||
|         next_url = self._html_search_regex( | ||||
|             r'&hd=(http.+?)&', webpage, 'video URL') | ||||
|         next_url = compat_urllib_parse.unquote(next_url) | ||||
|  | ||||
|         video_url = self._download_webpage( | ||||
|             next_url, video_id, note='Retrieving video URL', | ||||
|             errnote='Could not retrieve video URL') | ||||
|         age_limit = 18 | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -1,10 +1,11 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| @@ -12,16 +13,17 @@ from ..utils import ( | ||||
| class RBMARadioIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011', | ||||
|         u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3', | ||||
|         u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95', | ||||
|         u'info_dict': { | ||||
|             u"uploader_id": u"ford-lopatin",  | ||||
|             u"location": u"Spain",  | ||||
|             u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",  | ||||
|             u"uploader": u"Ford & Lopatin",  | ||||
|             u"title": u"Live at Primavera Sound 2011" | ||||
|         } | ||||
|         'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011', | ||||
|         'md5': '6bc6f9bcb18994b4c983bc3bf4384d95', | ||||
|         'info_dict': { | ||||
|             'id': 'ford-lopatin-live-at-primavera-sound-2011', | ||||
|             'ext': 'mp3', | ||||
|             "uploader_id": "ford-lopatin", | ||||
|             "location": "Spain", | ||||
|             "description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.", | ||||
|             "uploader": "Ford & Lopatin", | ||||
|             "title": "Live at Primavera Sound 2011", | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -31,26 +33,23 @@ class RBMARadioIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$', | ||||
|             webpage, u'json data', flags=re.MULTILINE) | ||||
|             webpage, 'json data', flags=re.MULTILINE) | ||||
|  | ||||
|         try: | ||||
|             data = json.loads(json_data) | ||||
|         except ValueError as e: | ||||
|             raise ExtractorError(u'Invalid JSON: ' + str(e)) | ||||
|             raise ExtractorError('Invalid JSON: ' + str(e)) | ||||
|  | ||||
|         video_url = data['akamai_url'] + '&cbr=256' | ||||
|         url_parts = compat_urllib_parse_urlparse(video_url) | ||||
|         video_ext = url_parts.path.rpartition('.')[2] | ||||
|         info = { | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': video_ext, | ||||
|                 'title': data['title'], | ||||
|                 'description': data.get('teaser_text'), | ||||
|                 'location': data.get('country_of_origin'), | ||||
|                 'uploader': data.get('host', {}).get('name'), | ||||
|                 'uploader_id': data.get('host', {}).get('slug'), | ||||
|                 'thumbnail': data.get('image', {}).get('large_url_2x'), | ||||
|                 'duration': data.get('duration'), | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': data['title'], | ||||
|             'description': data.get('teaser_text'), | ||||
|             'location': data.get('country_of_origin'), | ||||
|             'uploader': data.get('host', {}).get('name'), | ||||
|             'uploader_id': data.get('host', {}).get('slug'), | ||||
|             'thumbnail': data.get('image', {}).get('large_url_2x'), | ||||
|             'duration': data.get('duration'), | ||||
|         } | ||||
|         return [info] | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor): | ||||
|     IE_NAME = '220.ro' | ||||
|     _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)' | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", | ||||
|         u'file': u'LYV6doKo7f.mp4', | ||||
|         u'md5': u'03af18b73a07b4088753930db7a34add', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Luati-le Banii sez 4 ep 1", | ||||
|             u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", | ||||
|         "url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", | ||||
|         'file': 'LYV6doKo7f.mp4', | ||||
|         'md5': '03af18b73a07b4088753930db7a34add', | ||||
|         'info_dict': { | ||||
|             "title": "Luati-le Banii sez 4 ep 1", | ||||
|             "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         flashVars_str = self._search_regex( | ||||
|             r'<param name="flashVars" value="([^"]+)"', | ||||
|             webpage, u'flashVars') | ||||
|             webpage, 'flashVars') | ||||
|         flashVars = compat_parse_qs(flashVars_str) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'ext': 'mp4', | ||||
| @@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor): | ||||
|             'description': clean_html(flashVars['desc'][0]), | ||||
|             'thumbnail': flashVars['preview'][0], | ||||
|         } | ||||
|         return info | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os.path | ||||
| import re | ||||
| @@ -16,76 +17,76 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class SmotriIE(InfoExtractor): | ||||
|     IE_DESC = u'Smotri.com' | ||||
|     IE_NAME = u'smotri' | ||||
|     IE_DESC = 'Smotri.com' | ||||
|     IE_NAME = 'smotri' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         # real video id 2610366 | ||||
|         { | ||||
|             u'url': u'http://smotri.com/video/view/?id=v261036632ab', | ||||
|             u'file': u'v261036632ab.mp4', | ||||
|             u'md5': u'2a7b08249e6f5636557579c368040eb9', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'катастрофа с камер видеонаблюдения', | ||||
|                 u'uploader': u'rbc2008', | ||||
|                 u'uploader_id': u'rbc08', | ||||
|                 u'upload_date': u'20131118', | ||||
|                 u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения', | ||||
|                 u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', | ||||
|             'url': 'http://smotri.com/video/view/?id=v261036632ab', | ||||
|             'file': 'v261036632ab.mp4', | ||||
|             'md5': '2a7b08249e6f5636557579c368040eb9', | ||||
|             'info_dict': { | ||||
|                 'title': 'катастрофа с камер видеонаблюдения', | ||||
|                 'uploader': 'rbc2008', | ||||
|                 'uploader_id': 'rbc08', | ||||
|                 'upload_date': '20131118', | ||||
|                 'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения', | ||||
|                 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', | ||||
|             }, | ||||
|         }, | ||||
|         # real video id 57591 | ||||
|         { | ||||
|             u'url': u'http://smotri.com/video/view/?id=v57591cb20', | ||||
|             u'file': u'v57591cb20.flv', | ||||
|             u'md5': u'830266dfc21f077eac5afd1883091bcd', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'test', | ||||
|                 u'uploader': u'Support Photofile@photofile', | ||||
|                 u'uploader_id': u'support-photofile', | ||||
|                 u'upload_date': u'20070704', | ||||
|                 u'description': u'test, видео test', | ||||
|                 u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', | ||||
|             'url': 'http://smotri.com/video/view/?id=v57591cb20', | ||||
|             'file': 'v57591cb20.flv', | ||||
|             'md5': '830266dfc21f077eac5afd1883091bcd', | ||||
|             'info_dict': { | ||||
|                 'title': 'test', | ||||
|                 'uploader': 'Support Photofile@photofile', | ||||
|                 'uploader_id': 'support-photofile', | ||||
|                 'upload_date': '20070704', | ||||
|                 'description': 'test, видео test', | ||||
|                 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', | ||||
|             }, | ||||
|         }, | ||||
|         # video-password | ||||
|         { | ||||
|             u'url': u'http://smotri.com/video/view/?id=v1390466a13c', | ||||
|             u'file': u'v1390466a13c.mp4', | ||||
|             u'md5': u'f6331cef33cad65a0815ee482a54440b', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', | ||||
|                 u'uploader': u'timoxa40', | ||||
|                 u'uploader_id': u'timoxa40', | ||||
|                 u'upload_date': u'20100404', | ||||
|                 u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', | ||||
|                 u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', | ||||
|             'url': 'http://smotri.com/video/view/?id=v1390466a13c', | ||||
|             'file': 'v1390466a13c.mp4', | ||||
|             'md5': 'f6331cef33cad65a0815ee482a54440b', | ||||
|             'info_dict': { | ||||
|                 'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', | ||||
|                 'uploader': 'timoxa40', | ||||
|                 'uploader_id': 'timoxa40', | ||||
|                 'upload_date': '20100404', | ||||
|                 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', | ||||
|                 'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', | ||||
|             }, | ||||
|             u'params': { | ||||
|                 u'videopassword': u'qwerty', | ||||
|             'params': { | ||||
|                 'videopassword': 'qwerty', | ||||
|             }, | ||||
|         }, | ||||
|         # age limit + video-password | ||||
|         { | ||||
|             u'url': u'http://smotri.com/video/view/?id=v15408898bcf', | ||||
|             u'file': u'v15408898bcf.flv', | ||||
|             u'md5': u'91e909c9f0521adf5ee86fbe073aad70', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'этот ролик не покажут по ТВ', | ||||
|                 u'uploader': u'zzxxx', | ||||
|                 u'uploader_id': u'ueggb', | ||||
|                 u'upload_date': u'20101001', | ||||
|                 u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', | ||||
|                 u'age_limit': 18, | ||||
|                 u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ', | ||||
|             'url': 'http://smotri.com/video/view/?id=v15408898bcf', | ||||
|             'file': 'v15408898bcf.flv', | ||||
|             'md5': '91e909c9f0521adf5ee86fbe073aad70', | ||||
|             'info_dict': { | ||||
|                 'title': 'этот ролик не покажут по ТВ', | ||||
|                 'uploader': 'zzxxx', | ||||
|                 'uploader_id': 'ueggb', | ||||
|                 'upload_date': '20101001', | ||||
|                 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', | ||||
|                 'age_limit': 18, | ||||
|                 'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ', | ||||
|             }, | ||||
|             u'params': { | ||||
|                 u'videopassword': u'333' | ||||
|             'params': { | ||||
|                 'videopassword': '333' | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|      | ||||
|  | ||||
|     _SUCCESS = 0 | ||||
|     _PASSWORD_NOT_VERIFIED = 1 | ||||
|     _PASSWORD_DETECTED = 2 | ||||
| @@ -106,71 +107,71 @@ class SmotriIE(InfoExtractor): | ||||
|  | ||||
|         # Download video JSON data | ||||
|         video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id | ||||
|         video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON') | ||||
|         video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON') | ||||
|         video_json = json.loads(video_json_page) | ||||
|          | ||||
|  | ||||
|         status = video_json['status'] | ||||
|         if status == self._VIDEO_NOT_FOUND: | ||||
|             raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) | ||||
|         elif status == self._PASSWORD_DETECTED:  # The video is protected by a password, retry with | ||||
|             raise ExtractorError('Video %s does not exist' % video_id, expected=True) | ||||
|         elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with | ||||
|                                                 # video-password set | ||||
|             video_password = self._downloader.params.get('videopassword', None) | ||||
|             if not video_password: | ||||
|                 raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True) | ||||
|                 raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) | ||||
|             video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest() | ||||
|             video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)') | ||||
|             video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)') | ||||
|             video_json = json.loads(video_json_page) | ||||
|             status = video_json['status'] | ||||
|             if status == self._PASSWORD_NOT_VERIFIED: | ||||
|                 raise ExtractorError(u'Video password is invalid', expected=True) | ||||
|          | ||||
|                 raise ExtractorError('Video password is invalid', expected=True) | ||||
|  | ||||
|         if status != self._SUCCESS: | ||||
|             raise ExtractorError(u'Unexpected status value %s' % status) | ||||
|          | ||||
|             raise ExtractorError('Unexpected status value %s' % status) | ||||
|  | ||||
|         # Extract the URL of the video | ||||
|         video_url = video_json['file_data'] | ||||
|          | ||||
|  | ||||
|         # Video JSON does not provide enough meta data | ||||
|         # We will extract some from the video web page instead | ||||
|         video_page_url = 'http://' + mobj.group('url') | ||||
|         video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') | ||||
|         video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page') | ||||
|  | ||||
|         # Warning if video is unavailable | ||||
|         warning = self._html_search_regex( | ||||
|             r'<div class="videoUnModer">(.*?)</div>', video_page, | ||||
|             u'warning message', default=None) | ||||
|             'warning message', default=None) | ||||
|         if warning is not None: | ||||
|             self._downloader.report_warning( | ||||
|                 u'Video %s may not be available; smotri said: %s ' % | ||||
|                 'Video %s may not be available; smotri said: %s ' % | ||||
|                 (video_id, warning)) | ||||
|  | ||||
|         # Adult content | ||||
|         if re.search(u'EroConfirmText">', video_page) is not None: | ||||
|         if re.search('EroConfirmText">', video_page) is not None: | ||||
|             self.report_age_confirmation() | ||||
|             confirm_string = self._html_search_regex( | ||||
|                 r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id, | ||||
|                 video_page, u'confirm string') | ||||
|                 video_page, 'confirm string') | ||||
|             confirm_url = video_page_url + '&confirm=%s' % confirm_string | ||||
|             video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)') | ||||
|             video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)') | ||||
|             adult_content = True | ||||
|         else: | ||||
|             adult_content = False | ||||
|          | ||||
|  | ||||
|         # Extract the rest of meta data | ||||
|         video_title = self._search_meta(u'name', video_page, u'title') | ||||
|         video_title = self._search_meta('name', video_page, 'title') | ||||
|         if not video_title: | ||||
|             video_title = os.path.splitext(url_basename(video_url))[0] | ||||
|  | ||||
|         video_description = self._search_meta(u'description', video_page) | ||||
|         END_TEXT = u' на сайте Smotri.com' | ||||
|         video_description = self._search_meta('description', video_page) | ||||
|         END_TEXT = ' на сайте Smotri.com' | ||||
|         if video_description and video_description.endswith(END_TEXT): | ||||
|             video_description = video_description[:-len(END_TEXT)] | ||||
|         START_TEXT = u'Смотреть онлайн ролик ' | ||||
|         START_TEXT = 'Смотреть онлайн ролик ' | ||||
|         if video_description and video_description.startswith(START_TEXT): | ||||
|             video_description = video_description[len(START_TEXT):] | ||||
|         video_thumbnail = self._search_meta(u'thumbnail', video_page) | ||||
|         video_thumbnail = self._search_meta('thumbnail', video_page) | ||||
|  | ||||
|         upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date') | ||||
|         upload_date_str = self._search_meta('uploadDate', video_page, 'upload date') | ||||
|         if upload_date_str: | ||||
|             upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str) | ||||
|             video_upload_date = ( | ||||
| @@ -183,8 +184,8 @@ class SmotriIE(InfoExtractor): | ||||
|             ) | ||||
|         else: | ||||
|             video_upload_date = None | ||||
|          | ||||
|         duration_str = self._search_meta(u'duration', video_page) | ||||
|  | ||||
|         duration_str = self._search_meta('duration', video_page) | ||||
|         if duration_str: | ||||
|             duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str) | ||||
|             video_duration = ( | ||||
| @@ -197,19 +198,19 @@ class SmotriIE(InfoExtractor): | ||||
|             ) | ||||
|         else: | ||||
|             video_duration = None | ||||
|          | ||||
|  | ||||
|         video_uploader = self._html_search_regex( | ||||
|             u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>', | ||||
|             video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL) | ||||
|          | ||||
|             '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>', | ||||
|             video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL) | ||||
|  | ||||
|         video_uploader_id = self._html_search_regex( | ||||
|             u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">', | ||||
|             video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL) | ||||
|          | ||||
|             '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">', | ||||
|             video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL) | ||||
|  | ||||
|         video_view_count = self._html_search_regex( | ||||
|             u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>', | ||||
|             video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL) | ||||
|                  | ||||
|             'Общее количество просмотров.*?<span class="Number">(\\d+)</span>', | ||||
|             video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
| @@ -227,8 +228,8 @@ class SmotriIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class SmotriCommunityIE(InfoExtractor): | ||||
|     IE_DESC = u'Smotri.com community videos' | ||||
|     IE_NAME = u'smotri:community' | ||||
|     IE_DESC = 'Smotri.com community videos' | ||||
|     IE_NAME = 'smotri:community' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)' | ||||
|      | ||||
|     def _real_extract(self, url): | ||||
| @@ -236,21 +237,21 @@ class SmotriCommunityIE(InfoExtractor): | ||||
|         community_id = mobj.group('communityid') | ||||
|  | ||||
|         url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id | ||||
|         rss = self._download_xml(url, community_id, u'Downloading community RSS') | ||||
|         rss = self._download_xml(url, community_id, 'Downloading community RSS') | ||||
|  | ||||
|         entries = [self.url_result(video_url.text, 'Smotri') | ||||
|                    for video_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         description_text = rss.find('./channel/description').text | ||||
|         community_title = self._html_search_regex( | ||||
|             u'^Видео сообщества "([^"]+)"$', description_text, u'community title') | ||||
|             '^Видео сообщества "([^"]+)"$', description_text, 'community title') | ||||
|  | ||||
|         return self.playlist_result(entries, community_id, community_title) | ||||
|  | ||||
|  | ||||
| class SmotriUserIE(InfoExtractor): | ||||
|     IE_DESC = u'Smotri.com user videos' | ||||
|     IE_NAME = u'smotri:user' | ||||
|     IE_DESC = 'Smotri.com user videos' | ||||
|     IE_NAME = 'smotri:user' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -258,22 +259,22 @@ class SmotriUserIE(InfoExtractor): | ||||
|         user_id = mobj.group('userid') | ||||
|  | ||||
|         url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id | ||||
|         rss = self._download_xml(url, user_id, u'Downloading user RSS') | ||||
|         rss = self._download_xml(url, user_id, 'Downloading user RSS') | ||||
|  | ||||
|         entries = [self.url_result(video_url.text, 'Smotri') | ||||
|                    for video_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         description_text = rss.find('./channel/description').text | ||||
|         user_nickname = self._html_search_regex( | ||||
|             u'^Видео режиссера (.*)$', description_text, | ||||
|             u'user nickname') | ||||
|             '^Видео режиссера (.*)$', description_text, | ||||
|             'user nickname') | ||||
|  | ||||
|         return self.playlist_result(entries, user_id, user_nickname) | ||||
|  | ||||
|  | ||||
| class SmotriBroadcastIE(InfoExtractor): | ||||
|     IE_DESC = u'Smotri.com broadcasts' | ||||
|     IE_NAME = u'smotri:broadcast' | ||||
|     IE_DESC = 'Smotri.com broadcasts' | ||||
|     IE_NAME = 'smotri:broadcast' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -281,46 +282,40 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|         broadcast_id = mobj.group('broadcastid') | ||||
|  | ||||
|         broadcast_url = 'http://' + mobj.group('url') | ||||
|         broadcast_page = self._download_webpage(broadcast_url, broadcast_id, u'Downloading broadcast page') | ||||
|         broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page') | ||||
|  | ||||
|         if re.search(u'>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: | ||||
|             raise ExtractorError(u'Broadcast %s does not exist' % broadcast_id, expected=True) | ||||
|         if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: | ||||
|             raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True) | ||||
|  | ||||
|         # Adult content | ||||
|         if re.search(u'EroConfirmText">', broadcast_page) is not None: | ||||
|         if re.search('EroConfirmText">', broadcast_page) is not None: | ||||
|  | ||||
|             (username, password) = self._get_login_info() | ||||
|             if username is None: | ||||
|                 raise ExtractorError(u'Erotic broadcasts allowed only for registered users, ' | ||||
|                     u'use --username and --password options to provide account credentials.', expected=True) | ||||
|                 raise ExtractorError('Erotic broadcasts allowed only for registered users, ' | ||||
|                     'use --username and --password options to provide account credentials.', expected=True) | ||||
|  | ||||
|             # Log in | ||||
|             login_form_strs = { | ||||
|                 u'login-hint53': '1', | ||||
|                 u'confirm_erotic': '1', | ||||
|                 u'login': username, | ||||
|                 u'password': password, | ||||
|             login_form = { | ||||
|                 'login-hint53': '1', | ||||
|                 'confirm_erotic': '1', | ||||
|                 'login': username, | ||||
|                 'password': password, | ||||
|             } | ||||
|             # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | ||||
|             # chokes on unicode | ||||
|             login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) | ||||
|             login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') | ||||
|             login_url = broadcast_url + '/?no_redirect=1' | ||||
|             request = compat_urllib_request.Request(login_url, login_data) | ||||
|             request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|             broadcast_page = self._download_webpage( | ||||
|                 request, broadcast_id, note=u'Logging in and confirming age') | ||||
|  | ||||
|             if re.search(u'>Неверный логин или пароль<', broadcast_page) is not None: | ||||
|                 raise ExtractorError(u'Unable to log in: bad username or password', expected=True) | ||||
|             request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) | ||||
|             request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|             broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age') | ||||
|  | ||||
|             if re.search('>Неверный логин или пароль<', broadcast_page) is not None: | ||||
|                 raise ExtractorError('Unable to log in: bad username or password', expected=True) | ||||
|  | ||||
|             adult_content = True | ||||
|         else: | ||||
|             adult_content = False | ||||
|  | ||||
|         ticket = self._html_search_regex( | ||||
|             u'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', | ||||
|             broadcast_page, u'broadcast ticket') | ||||
|             'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', | ||||
|             broadcast_page, 'broadcast ticket') | ||||
|  | ||||
|         url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket | ||||
|  | ||||
| @@ -328,22 +323,22 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|         if broadcast_password: | ||||
|             url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         broadcast_json_page = self._download_webpage(url, broadcast_id, u'Downloading broadcast JSON') | ||||
|         broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON') | ||||
|  | ||||
|         try: | ||||
|             broadcast_json = json.loads(broadcast_json_page) | ||||
|  | ||||
|             protected_broadcast = broadcast_json['_pass_protected'] == 1 | ||||
|             if protected_broadcast and not broadcast_password: | ||||
|                 raise ExtractorError(u'This broadcast is protected by a password, use the --video-password option', expected=True) | ||||
|                 raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True) | ||||
|  | ||||
|             broadcast_offline = broadcast_json['is_play'] == 0 | ||||
|             if broadcast_offline: | ||||
|                 raise ExtractorError(u'Broadcast %s is offline' % broadcast_id, expected=True) | ||||
|                 raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True) | ||||
|  | ||||
|             rtmp_url = broadcast_json['_server'] | ||||
|             if not rtmp_url.startswith('rtmp://'): | ||||
|                 raise ExtractorError(u'Unexpected broadcast rtmp URL') | ||||
|                 raise ExtractorError('Unexpected broadcast rtmp URL') | ||||
|  | ||||
|             broadcast_playpath = broadcast_json['_streamName'] | ||||
|             broadcast_thumbnail = broadcast_json['_imgURL'] | ||||
| @@ -354,8 +349,8 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|             rtmp_conn = 'S:%s' % uuid.uuid4().hex | ||||
|         except KeyError: | ||||
|             if protected_broadcast: | ||||
|                 raise ExtractorError(u'Bad broadcast password', expected=True) | ||||
|             raise ExtractorError(u'Unexpected broadcast JSON') | ||||
|                 raise ExtractorError('Bad broadcast password', expected=True) | ||||
|             raise ExtractorError('Unexpected broadcast JSON') | ||||
|  | ||||
|         return { | ||||
|             'id': broadcast_id, | ||||
|   | ||||
| @@ -1,34 +1,36 @@ | ||||
| import re | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
|  | ||||
|  | ||||
| class SouthParkStudiosIE(MTVServicesInfoExtractor): | ||||
|     IE_NAME = u'southparkstudios.com' | ||||
|     _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' | ||||
|     IE_NAME = 'southparkstudios.com' | ||||
|     _VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' | ||||
|  | ||||
|     _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', | ||||
|         u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Bat Daded', | ||||
|             u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.', | ||||
|         'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', | ||||
|         'info_dict': { | ||||
|             'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Bat Daded', | ||||
|             'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|  | ||||
| class SouthparkDeIE(SouthParkStudiosIE): | ||||
|     IE_NAME = u'southpark.de' | ||||
|     _VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' | ||||
|     IE_NAME = 'southpark.de' | ||||
|     _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' | ||||
|     _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured', | ||||
|         u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'The Government Won\'t Respect My Privacy', | ||||
|             u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.', | ||||
|         'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured', | ||||
|         'info_dict': { | ||||
|             'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Government Won\'t Respect My Privacy', | ||||
|             'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.', | ||||
|         }, | ||||
|     }] | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -6,20 +8,20 @@ from .common import InfoExtractor | ||||
| class SpiegelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', | ||||
|         u'file': u'1259285.mp4', | ||||
|         u'md5': u'2c2754212136f35fb4b19767d242f66e', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" | ||||
|         } | ||||
|         'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', | ||||
|         'file': '1259285.mp4', | ||||
|         'md5': '2c2754212136f35fb4b19767d242f66e', | ||||
|         'info_dict': { | ||||
|             'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv', | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', | ||||
|         u'file': u'1309159.mp4', | ||||
|         u'md5': u'f2cdf638d7aa47654e251e1aee360af1', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers' | ||||
|         } | ||||
|         'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', | ||||
|         'file': '1309159.mp4', | ||||
|         'md5': 'f2cdf638d7aa47654e251e1aee360af1', | ||||
|         'info_dict': { | ||||
|             'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<div class="module-title">(.*?)</div>', webpage, u'title') | ||||
|             r'<div class="module-title">(.*?)</div>', webpage, 'title') | ||||
|  | ||||
|         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' | ||||
|         xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml' | ||||
|         idoc = self._download_xml( | ||||
|             xml_url, video_id, | ||||
|             note=u'Downloading XML', errnote=u'Failed to download XML') | ||||
|             note='Downloading XML', errnote='Failed to download XML') | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'format_id': n.tag.rpartition('type')[2], | ||||
|                 'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text, | ||||
|                 'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text, | ||||
|                 'width': int(n.find('./width').text), | ||||
|                 'height': int(n.find('./height').text), | ||||
|                 'abr': int(n.find('./audiobitrate').text), | ||||
| @@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|         return info | ||||
|   | ||||
| @@ -62,24 +62,30 @@ class SubtitlesInfoExtractor(InfoExtractor): | ||||
|                 subtitles[sub_lang] = subtitle | ||||
|         return subtitles | ||||
|  | ||||
|     def _download_subtitle_url(self, sub_lang, url): | ||||
|         return self._download_webpage(url, None, note=False) | ||||
|  | ||||
|     def _request_subtitle_url(self, sub_lang, url): | ||||
|         """ makes the http request for the subtitle """ | ||||
|         try: | ||||
|             sub = self._download_webpage(url, None, note=False) | ||||
|             return self._download_subtitle_url(sub_lang, url) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) | ||||
|             return | ||||
|         if not sub: | ||||
|             self._downloader.report_warning(u'Did not fetch video subtitles') | ||||
|             return | ||||
|         return sub | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         """ | ||||
|         returns {sub_lang: url} or {} if not available | ||||
|         Must be redefined by the subclasses | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|         # By default, allow implementations to simply pass in the result | ||||
|         assert isinstance(webpage, dict), \ | ||||
|             '_get_available_subtitles not implemented' | ||||
|         return webpage | ||||
|  | ||||
|     def _get_available_automatic_caption(self, video_id, webpage): | ||||
|         """ | ||||
|   | ||||
							
								
								
									
										50
									
								
								youtube_dl/extractor/tinypic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								youtube_dl/extractor/tinypic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from youtube_dl.utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class TinyPicIE(InfoExtractor): | ||||
|     IE_NAME = 'tinypic' | ||||
|     IE_DESC = 'tinypic.com videos' | ||||
|     _VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8', | ||||
|         'md5': '609b74432465364e72727ebc6203f044', | ||||
|         'info_dict': { | ||||
|             'id': '6xw7tc', | ||||
|             'ext': 'flv', | ||||
|             'title': 'shadow phenomenon weird', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id, 'Downloading page') | ||||
|          | ||||
|         mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n' | ||||
|             '\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError('Video %s does not exist' % video_id, expected=True) | ||||
|  | ||||
|         file_id = mobj.group('fileid') | ||||
|         server_id = mobj.group('serverid') | ||||
|  | ||||
|         KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting' | ||||
|         keywords = self._html_search_meta('keywords', webpage, 'title') | ||||
|         title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(KEYWORDS_SUFFIX) else '' | ||||
|  | ||||
|         video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id) | ||||
|         thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id) | ||||
|  | ||||
|         return { | ||||
|             'id': file_id, | ||||
|             'url': video_url, | ||||
|             'thumbnail': thumbnail, | ||||
|             'title': title | ||||
|         } | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -6,12 +8,13 @@ from .common import InfoExtractor | ||||
| class TrailerAddictIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer', | ||||
|         u'file': u'76184.mp4', | ||||
|         u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Prince Avalanche Trailer", | ||||
|             u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind." | ||||
|         'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer', | ||||
|         'md5': '41365557f3c8c397d091da510e73ceb4', | ||||
|         'info_dict': { | ||||
|             'id': '76184', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Prince Avalanche Trailer', | ||||
|             'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -22,9 +25,15 @@ class TrailerAddictIE(InfoExtractor): | ||||
|  | ||||
|         title = self._search_regex(r'<title>(.+?)</title>', | ||||
|                 webpage, 'video title').replace(' - Trailer Addict','') | ||||
|         view_count = self._search_regex(r'Views: (.+?)<br />', | ||||
|                 webpage, 'Views Count') | ||||
|         video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1] | ||||
|         view_count_str = self._search_regex( | ||||
|             r'<span class="views_n">([0-9,.]+)</span>', | ||||
|             webpage, 'view count', fatal=False) | ||||
|         view_count = ( | ||||
|             None if view_count_str is None | ||||
|             else int(view_count_str.replace(',', ''))) | ||||
|         video_id = self._search_regex( | ||||
|             r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         # Presence of (no)watchplus function indicates HD quality is available | ||||
|         if re.search(r'function (no)?watchplus()', webpage): | ||||
| @@ -39,14 +48,16 @@ class TrailerAddictIE(InfoExtractor): | ||||
|                 info_webpage, 'Download url').replace('%3F','?') | ||||
|         thumbnail_url = self._search_regex(r'&image=(.+?)&', | ||||
|                 info_webpage, 'thumbnail url') | ||||
|         ext = final_url.split('.')[-1].split('?')[0] | ||||
|  | ||||
|         return [{ | ||||
|             'id'          : video_id, | ||||
|             'url'         : final_url, | ||||
|             'ext'         : ext, | ||||
|             'title'       : title, | ||||
|             'thumbnail'   : thumbnail_url, | ||||
|             'description' : self._og_search_description(webpage), | ||||
|             'view_count'  : view_count, | ||||
|         }] | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': final_url, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail_url, | ||||
|             'description': description, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| from __future__ import unicode_literals | ||||
| import base64 | ||||
| import re | ||||
|  | ||||
| @@ -6,15 +7,16 @@ from ..utils import ( | ||||
|     compat_parse_qs, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TutvIE(InfoExtractor): | ||||
|     _VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc', | ||||
|         u'file': u'2742556.flv', | ||||
|         u'md5': u'5eb766671f69b82e528dc1e7769c5cb2', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Noah en pabellon cuahutemoc" | ||||
|         } | ||||
|         'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc', | ||||
|         'file': '2742556.flv', | ||||
|         'md5': '5eb766671f69b82e528dc1e7769c5cb2', | ||||
|         'info_dict': { | ||||
|             'title': 'Noah en pabellon cuahutemoc', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -22,18 +24,15 @@ class TutvIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID') | ||||
|         internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') | ||||
|  | ||||
|         data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id) | ||||
|         data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info') | ||||
|         data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id) | ||||
|         data_content = self._download_webpage(data_url, video_id, note='Downloading video info') | ||||
|         data = compat_parse_qs(data_content) | ||||
|         video_url = base64.b64decode(data['kpt'][0]).decode('utf-8') | ||||
|         ext = video_url.partition(u'?')[0].rpartition(u'.')[2] | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': internal_id, | ||||
|             'url': video_url, | ||||
|             'ext': ext, | ||||
|             'title': self._og_search_title(webpage), | ||||
|         } | ||||
|         return [info] | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| @@ -10,48 +12,48 @@ from ..utils import ( | ||||
|  | ||||
| class UstreamIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' | ||||
|     IE_NAME = u'ustream' | ||||
|     IE_NAME = 'ustream' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.ustream.tv/recorded/20274954', | ||||
|         u'file': u'20274954.flv', | ||||
|         u'md5': u'088f151799e8f572f84eb62f17d73e5c', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"Young Americans for Liberty",  | ||||
|             u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM" | ||||
|         } | ||||
|         'url': 'http://www.ustream.tv/recorded/20274954', | ||||
|         'file': '20274954.flv', | ||||
|         'md5': '088f151799e8f572f84eb62f17d73e5c', | ||||
|         'info_dict': { | ||||
|             "uploader": "Young Americans for Liberty", | ||||
|             "title": "Young Americans for Liberty February 7, 2012 2:28 AM", | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('videoID') | ||||
|  | ||||
|         video_url = u'http://tcdn.ustream.tv/video/%s' % video_id | ||||
|         video_url = 'http://tcdn.ustream.tv/video/%s' % video_id | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'data-title="(?P<title>.+)"', | ||||
|             webpage, u'title') | ||||
|             webpage, 'title') | ||||
|  | ||||
|         uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>', | ||||
|             webpage, u'uploader', fatal=False, flags=re.DOTALL) | ||||
|             webpage, 'uploader', fatal=False, flags=re.DOTALL) | ||||
|  | ||||
|         thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"', | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|             'title': video_title, | ||||
|             'uploader': uploader, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|         info = { | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': 'flv', | ||||
|                 'title': video_title, | ||||
|                 'uploader': uploader, | ||||
|                 'thumbnail': thumbnail, | ||||
|                } | ||||
|         return info | ||||
|  | ||||
| class UstreamChannelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)' | ||||
|     IE_NAME = u'ustream:channel' | ||||
|     IE_NAME = 'ustream:channel' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import xml.etree.ElementTree | ||||
| import datetime | ||||
|  | ||||
| @@ -22,16 +23,16 @@ class VevoIE(InfoExtractor): | ||||
|            vevo:) | ||||
|         (?P<id>[^&?#]+)''' | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||
|         u'file': u'GB1101300280.mp4', | ||||
|         u"md5": u"06bea460acb744eab74a9d7dcb4bfd61", | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20130624", | ||||
|             u"uploader": u"Hurts", | ||||
|             u"title": u"Somebody to Die For", | ||||
|             u"duration": 230.12, | ||||
|             u"width": 1920, | ||||
|             u"height": 1080, | ||||
|         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||
|         'file': 'GB1101300280.mp4', | ||||
|         "md5": "06bea460acb744eab74a9d7dcb4bfd61", | ||||
|         'info_dict': { | ||||
|             "upload_date": "20130624", | ||||
|             "uploader": "Hurts", | ||||
|             "title": "Somebody to Die For", | ||||
|             "duration": 230.12, | ||||
|             "width": 1920, | ||||
|             "height": 1080, | ||||
|         } | ||||
|     }] | ||||
|     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' | ||||
| @@ -44,7 +45,7 @@ class VevoIE(InfoExtractor): | ||||
|                 if version['version'] > last_version['version']: | ||||
|                     last_version = version | ||||
|         if last_version['version'] == -1: | ||||
|             raise ExtractorError(u'Unable to extract last version of the video') | ||||
|             raise ExtractorError('Unable to extract last version of the video') | ||||
|  | ||||
|         renditions = xml.etree.ElementTree.fromstring(last_version['data']) | ||||
|         formats = [] | ||||
| @@ -85,7 +86,7 @@ class VevoIE(InfoExtractor): | ||||
|             format_url = self._SMIL_BASE_URL + m.group('path') | ||||
|             formats.append({ | ||||
|                 'url': format_url, | ||||
|                 'format_id': u'SMIL_' + m.group('cbr'), | ||||
|                 'format_id': 'SMIL_' + m.group('cbr'), | ||||
|                 'vcodec': m.group('vcodec'), | ||||
|                 'acodec': m.group('acodec'), | ||||
|                 'vbr': int(m.group('vbr')), | ||||
| @@ -101,26 +102,25 @@ class VevoIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id | ||||
|         info_json = self._download_webpage(json_url, video_id, u'Downloading json info') | ||||
|         video_info = json.loads(info_json)['video'] | ||||
|         video_info = self._download_json(json_url, video_id)['video'] | ||||
|  | ||||
|         formats = self._formats_from_json(video_info) | ||||
|         try: | ||||
|             smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( | ||||
|                 self._SMIL_BASE_URL, video_id, video_id.lower()) | ||||
|             smil_xml = self._download_webpage(smil_url, video_id, | ||||
|                                               u'Downloading SMIL info') | ||||
|                                               'Downloading SMIL info') | ||||
|             formats.extend(self._formats_from_smil(smil_xml)) | ||||
|         except ExtractorError as ee: | ||||
|             if not isinstance(ee.cause, compat_HTTPError): | ||||
|                 raise | ||||
|             self._downloader.report_warning( | ||||
|                 u'Cannot download SMIL information, falling back to JSON ..') | ||||
|                 'Cannot download SMIL information, falling back to JSON ..') | ||||
|  | ||||
|         timestamp_ms = int(self._search_regex( | ||||
|             r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date')) | ||||
|             r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date')) | ||||
|         upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000) | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_info['title'], | ||||
|             'formats': formats, | ||||
| @@ -129,5 +129,3 @@ class VevoIE(InfoExtractor): | ||||
|             'uploader': video_info['mainArtists'][0]['artistName'], | ||||
|             'duration': video_info['duration'], | ||||
|         } | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -6,10 +6,10 @@ import re | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     clean_html, | ||||
|     get_element_by_attribute, | ||||
|     ExtractorError, | ||||
| @@ -19,7 +19,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VimeoIE(InfoExtractor): | ||||
| class VimeoIE(SubtitlesInfoExtractor): | ||||
|     """Information extractor for vimeo.com.""" | ||||
|  | ||||
|     # _VALID_URL matches Vimeo URLs | ||||
| @@ -84,6 +84,20 @@ class VimeoIE(InfoExtractor): | ||||
|                 'videopassword': 'youtube-dl', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vimeo.com/76979871', | ||||
|             'md5': '3363dd6ffebe3784d56f4132317fd446', | ||||
|             'note': 'Video with subtitles', | ||||
|             'info_dict': { | ||||
|                 'id': '76979871', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'The New Vimeo Player (You Know, For Videos)', | ||||
|                 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', | ||||
|                 'upload_date': '20131015', | ||||
|                 'uploader_id': 'staff', | ||||
|                 'uploader': 'Vimeo Staff', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _login(self): | ||||
| @@ -273,19 +287,31 @@ class VimeoIE(InfoExtractor): | ||||
|         if len(formats) == 0: | ||||
|             raise ExtractorError('No known codec found') | ||||
|  | ||||
|         subtitles = {} | ||||
|         text_tracks = config['request'].get('text_tracks') | ||||
|         if text_tracks: | ||||
|             for tt in text_tracks: | ||||
|                 subtitles[tt['lang']] = 'http://vimeo.com' + tt['url'] | ||||
|  | ||||
|         video_subtitles = self.extract_subtitles(video_id, subtitles) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         return { | ||||
|             'id':       video_id, | ||||
|             'id': video_id, | ||||
|             'uploader': video_uploader, | ||||
|             'uploader_id': video_uploader_id, | ||||
|             'upload_date':  video_upload_date, | ||||
|             'title':    video_title, | ||||
|             'thumbnail':    video_thumbnail, | ||||
|             'description':  video_description, | ||||
|             'upload_date': video_upload_date, | ||||
|             'title': video_title, | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'description': video_description, | ||||
|             'formats': formats, | ||||
|             'webpage_url': url, | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|             'comment_count': comment_count, | ||||
|             'subtitles': video_subtitles, | ||||
|         } | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/vube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/vube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import datetime | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class VubeIE(InfoExtractor): | ||||
|     IE_NAME = 'vube' | ||||
|     IE_DESC = 'Vube.com' | ||||
|     _VALID_URL = r'http://vube\.com/[^/]+/(?P<id>[\da-zA-Z]{10})' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', | ||||
|         'md5': 'f81dcf6d0448e3291f54380181695821', | ||||
|         'info_dict': { | ||||
|             'id': 'YL2qNPkqon', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Chiara Grispo - Price Tag by Jessie J', | ||||
|             'description': 'md5:8ea652a1f36818352428cb5134933313', | ||||
|             'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg', | ||||
|             'uploader': 'Chiara.Grispo', | ||||
|             'uploader_id': '1u3hX0znhP', | ||||
|             'upload_date': '20140103', | ||||
|             'duration': 170.56 | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video = self._download_json('http://vube.com/api/v2/video/%s' % video_id, | ||||
|             video_id, 'Downloading video JSON') | ||||
|  | ||||
|         public_id = video['public_id'] | ||||
|  | ||||
|         formats = [{'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id), | ||||
|                    'height': int(fmt['height']), | ||||
|                    'abr': int(fmt['audio_bitrate']), | ||||
|                    'vbr': int(fmt['video_bitrate']), | ||||
|                    'format_id': fmt['media_resolution_id'] | ||||
|                    } for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed'] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = video['title'] | ||||
|         description = video.get('description') | ||||
|         thumbnail = video['thumbnail_src'] | ||||
|         if thumbnail.startswith('//'): | ||||
|             thumbnail = 'http:' + thumbnail | ||||
|         uploader = video['user_alias'] | ||||
|         uploader_id = video['user_url_id'] | ||||
|         upload_date = datetime.datetime.fromtimestamp(int(video['upload_time'])).strftime('%Y%m%d') | ||||
|         duration = video['duration'] | ||||
|         view_count = video['raw_view_count'] | ||||
|         like_count = video['total_likes'] | ||||
|         dislike_count= video['total_hates'] | ||||
|  | ||||
|         comment = self._download_json('http://vube.com/api/video/%s/comment' % video_id, | ||||
|             video_id, 'Downloading video comment JSON') | ||||
|  | ||||
|         comment_count = comment['total'] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'upload_date': upload_date, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|             'dislike_count': dislike_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
| @@ -5,7 +5,6 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     unescapeHTML, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -9,12 +11,12 @@ from ..utils import ( | ||||
| class YouJizzIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', | ||||
|         u'file': u'2189178.flv', | ||||
|         u'md5': u'07e15fa469ba384c7693fd246905547c', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Zeichentrick 1", | ||||
|             u"age_limit": 18, | ||||
|         'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', | ||||
|         'file': '2189178.flv', | ||||
|         'md5': '07e15fa469ba384c7693fd246905547c', | ||||
|         'info_dict': { | ||||
|             "title": "Zeichentrick 1", | ||||
|             "age_limit": 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -30,12 +32,12 @@ class YouJizzIE(InfoExtractor): | ||||
|  | ||||
|         # Get the video title | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>', | ||||
|             webpage, u'title').strip() | ||||
|             webpage, 'title').strip() | ||||
|  | ||||
|         # Get the embed page | ||||
|         result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage) | ||||
|         if result is None: | ||||
|             raise ExtractorError(u'ERROR: unable to extract embed page') | ||||
|             raise ExtractorError('ERROR: unable to extract embed page') | ||||
|  | ||||
|         embed_page_url = result.group(0).strip() | ||||
|         video_id = result.group('videoid') | ||||
| @@ -47,23 +49,23 @@ class YouJizzIE(InfoExtractor): | ||||
|         if m_playlist is not None: | ||||
|             playlist_url = m_playlist.group('playlist') | ||||
|             playlist_page = self._download_webpage(playlist_url, video_id, | ||||
|                                                    u'Downloading playlist page') | ||||
|                                                    'Downloading playlist page') | ||||
|             m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page)) | ||||
|             if len(m_levels) == 0: | ||||
|                 raise ExtractorError(u'Unable to extract video url') | ||||
|                 raise ExtractorError('Unable to extract video url') | ||||
|             videos = [(int(m.group(1)), m.group(2)) for m in m_levels] | ||||
|             (_, video_url) = sorted(videos)[0] | ||||
|             video_url = video_url.replace('%252F', '%2F') | ||||
|         else: | ||||
|             video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', | ||||
|                                            webpage, u'video URL') | ||||
|                                            webpage, 'video URL') | ||||
|  | ||||
|         info = {'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'title': video_title, | ||||
|                 'ext': 'flv', | ||||
|                 'format': 'flv', | ||||
|                 'player_url': embed_page_url, | ||||
|                 'age_limit': age_limit} | ||||
|  | ||||
|         return [info] | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': video_title, | ||||
|             'ext': 'flv', | ||||
|             'format': 'flv', | ||||
|             'player_url': embed_page_url, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
|   | ||||
| @@ -1662,7 +1662,7 @@ class YoutubeUserIE(InfoExtractor): | ||||
|                     '_type': 'url', | ||||
|                     'url': video_id, | ||||
|                     'ie_key': 'Youtube', | ||||
|                     'id': 'video_id', | ||||
|                     'id': video_id, | ||||
|                     'title': title, | ||||
|                 } | ||||
|         url_results = PagedList(download_page, self._GDATA_PAGE_SIZE) | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.01.28.1' | ||||
| __version__ = '2014.02.03.1' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user