Compare commits
	
		
			18 Commits
		
	
	
		
			2013.12.02
			...
			2013.12.04
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 671c0f151d | ||
|   | 27dcce1904 | ||
|   | 84db81815a | ||
|   | fb7abb31af | ||
|   | ce93879a9b | ||
|   | 938384c587 | ||
|   | e9d8e302aa | ||
|   | cb7fb54600 | ||
|   | cf6758d204 | ||
|   | 731e3dde29 | ||
|   | a0eaa341e1 | ||
|   | fb27c2295e | ||
|   | 1b753cb334 | ||
|   | 36a826a50d | ||
|   | 8796857429 | ||
|   | aaebed13a8 | ||
|   | 25939ffe56 | ||
|   | 5270d8cb13 | 
| @@ -30,7 +30,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --list-extractors          List all supported extractors and the URLs they | ||||
|                                would handle | ||||
|     --extractor-descriptions   Output descriptions of all supported extractors | ||||
|     --proxy URL                Use the specified HTTP/HTTPS proxy | ||||
|     --proxy URL                Use the specified HTTP/HTTPS proxy. Pass in an | ||||
|                                empty string (--proxy "") for direct connection | ||||
|     --no-check-certificate     Suppress HTTPS certificate validation. | ||||
|     --cache-dir DIR            Location in the filesystem where youtube-dl can | ||||
|                                store downloaded information permanently. By | ||||
| @@ -55,7 +56,7 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --dateafter DATE           download only videos uploaded after this date | ||||
|     --no-playlist              download only the currently playing video | ||||
|     --age-limit YEARS          download only videos suitable for the given age | ||||
|     --download-archive FILE    Download only videos not present in the archive | ||||
|     --download-archive FILE    Download only videos not listed in the archive | ||||
|                                file. Record the IDs of all downloaded videos in | ||||
|                                it. | ||||
|  | ||||
| @@ -183,7 +184,7 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|  | ||||
| # CONFIGURATION | ||||
|  | ||||
| You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. | ||||
| You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`. | ||||
|  | ||||
| # OUTPUT TEMPLATE | ||||
|  | ||||
|   | ||||
| @@ -22,7 +22,9 @@ from youtube_dl.extractor import ( | ||||
|     LivestreamIE, | ||||
|     NHLVideocenterIE, | ||||
|     BambuserChannelIE, | ||||
|     BandcampAlbumIE | ||||
|     BandcampAlbumIE, | ||||
|     SmotriCommunityIE, | ||||
|     SmotriUserIE | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -119,6 +121,24 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], u'Nightmare Night EP') | ||||
|         self.assertTrue(len(result['entries']) >= 4) | ||||
|          | ||||
|     def test_smotri_community(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = SmotriCommunityIE(dl) | ||||
|         result = ie.extract('http://smotri.com/community/video/kommuna') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], u'kommuna') | ||||
|         self.assertEqual(result['title'], u'КПРФ') | ||||
|         self.assertTrue(len(result['entries']) >= 4) | ||||
|          | ||||
|     def test_smotri_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = SmotriUserIE(dl) | ||||
|         result = ie.extract('http://smotri.com/user/inspector') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], u'inspector') | ||||
|         self.assertEqual(result['title'], u'Inspector') | ||||
|         self.assertTrue(len(result['entries']) >= 9) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -36,6 +36,7 @@ __authors__  = ( | ||||
|     'Marcin Cieślak', | ||||
|     'Anton Larionov', | ||||
|     'Takuya Tsuchida', | ||||
|     'Sergey M.', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
| @@ -80,11 +81,11 @@ from .PostProcessor import ( | ||||
|  | ||||
|  | ||||
| def parseOpts(overrideArguments=None): | ||||
|     def _readOptions(filename_bytes): | ||||
|     def _readOptions(filename_bytes, default=[]): | ||||
|         try: | ||||
|             optionf = open(filename_bytes) | ||||
|         except IOError: | ||||
|             return [] # silently skip if file is not present | ||||
|             return default  # silently skip if file is not present | ||||
|         try: | ||||
|             res = [] | ||||
|             for l in optionf: | ||||
| @@ -190,7 +191,9 @@ def parseOpts(overrideArguments=None): | ||||
|     general.add_option('--extractor-descriptions', | ||||
|             action='store_true', dest='list_extractor_descriptions', | ||||
|             help='Output descriptions of all supported extractors', default=False) | ||||
|     general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') | ||||
|     general.add_option( | ||||
|         '--proxy', dest='proxy', default=None, metavar='URL', | ||||
|         help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') | ||||
|     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') | ||||
|     general.add_option( | ||||
|         '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', | ||||
| @@ -223,7 +226,7 @@ def parseOpts(overrideArguments=None): | ||||
|                          default=None, type=int) | ||||
|     selection.add_option('--download-archive', metavar='FILE', | ||||
|                          dest='download_archive', | ||||
|                          help='Download only videos not present in the archive file. Record the IDs of all downloaded videos in it.') | ||||
|                          help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.') | ||||
|  | ||||
|  | ||||
|     authentication.add_option('-u', '--username', | ||||
| @@ -418,6 +421,8 @@ def parseOpts(overrideArguments=None): | ||||
|         if opts.verbose: | ||||
|             write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') | ||||
|     else: | ||||
|         systemConf = _readOptions('/etc/youtube-dl.conf') | ||||
|  | ||||
|         xdg_config_home = os.environ.get('XDG_CONFIG_HOME') | ||||
|         if xdg_config_home: | ||||
|             userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') | ||||
| @@ -427,8 +432,31 @@ def parseOpts(overrideArguments=None): | ||||
|             userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') | ||||
|             if not os.path.isfile(userConfFile): | ||||
|                 userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') | ||||
|         systemConf = _readOptions('/etc/youtube-dl.conf') | ||||
|         userConf = _readOptions(userConfFile) | ||||
|         userConf = _readOptions(userConfFile, None) | ||||
|  | ||||
|         if userConf is None: | ||||
|             appdata_dir = os.environ.get('appdata') | ||||
|             if appdata_dir: | ||||
|                 userConf = _readOptions( | ||||
|                     os.path.join(appdata_dir, 'youtube-dl', 'config'), | ||||
|                     default=None) | ||||
|                 if userConf is None: | ||||
|                     userConf = _readOptions( | ||||
|                         os.path.join(appdata_dir, 'youtube-dl', 'config.txt'), | ||||
|                         default=None) | ||||
|  | ||||
|         if userConf is None: | ||||
|             userConf = _readOptions( | ||||
|                 os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'), | ||||
|                 default=None) | ||||
|         if userConf is None: | ||||
|             userConf = _readOptions( | ||||
|                 os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'), | ||||
|                 default=None) | ||||
|  | ||||
|         if userConf is None: | ||||
|             userConf = [] | ||||
|  | ||||
|         commandLineConf = sys.argv[1:] | ||||
|         argv = systemConf + userConf + commandLineConf | ||||
|         opts, args = parser.parse_args(argv) | ||||
|   | ||||
| @@ -121,6 +121,11 @@ from .rutube import RutubeIE | ||||
| from .sina import SinaIE | ||||
| from .slashdot import SlashdotIE | ||||
| from .slideshare import SlideshareIE | ||||
| from .smotri import ( | ||||
|     SmotriIE, | ||||
|     SmotriCommunityIE, | ||||
|     SmotriUserIE, | ||||
| ) | ||||
| from .sohu import SohuIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE | ||||
| from .southparkstudios import ( | ||||
|   | ||||
| @@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor): | ||||
|                 }) | ||||
|             formats = sorted(formats, key=lambda f: (f['height'], f['width'])) | ||||
|  | ||||
|             info = { | ||||
|             playlist.append({ | ||||
|                 '_type': 'video', | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
| @@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor): | ||||
|                 'upload_date': upload_date, | ||||
|                 'uploader_id': uploader_id, | ||||
|                 'user_agent': 'QuickTime compatible (youtube-dl)', | ||||
|             } | ||||
|             # TODO: Remove when #980 has been merged | ||||
|             info['url'] = formats[-1]['url'] | ||||
|             info['ext'] = formats[-1]['ext'] | ||||
|  | ||||
|             playlist.append(info) | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|   | ||||
| @@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor): | ||||
|         for f in formats: | ||||
|             f['ext'] = determine_ext(f['url']) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
| @@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor): | ||||
|             'description': description, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'thumbnail': data.get('misc', {}).get('image'), | ||||
|         } | ||||
|         thumbnail = data.get('misc', {}).get('image') | ||||
|         if thumbnail: | ||||
|             info['thumbnail'] = thumbnail | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .mtv import MTVIE, _media_xml_tag | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
| @@ -11,7 +11,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ComedyCentralIE(MTVIE): | ||||
| class ComedyCentralIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' | ||||
|     _FEED_URL = u'http://comedycentral.com/feeds/mrss/' | ||||
|  | ||||
| @@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE): | ||||
|             u'description': u'After a certain point, breastfeeding becomes c**kblocking.', | ||||
|         }, | ||||
|     } | ||||
|     # Overwrite MTVIE properties we don't want | ||||
|     _TESTS = [] | ||||
|  | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) | ||||
|         return itemdoc.find(search_path).attrib['url'] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -197,7 +191,7 @@ class ComedyCentralShowsIE(InfoExtractor): | ||||
|                 }) | ||||
|  | ||||
|             effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) | ||||
|             info = { | ||||
|             results.append({ | ||||
|                 'id': shortMediaId, | ||||
|                 'formats': formats, | ||||
|                 'uploader': showId, | ||||
| @@ -205,11 +199,6 @@ class ComedyCentralShowsIE(InfoExtractor): | ||||
|                 'title': effTitle, | ||||
|                 'thumbnail': None, | ||||
|                 'description': compat_str(officialTitle), | ||||
|             } | ||||
|  | ||||
|             # TODO: Remove when #980 has been merged | ||||
|             info.update(info['formats'][-1]) | ||||
|  | ||||
|             results.append(info) | ||||
|             }) | ||||
|  | ||||
|         return results | ||||
|   | ||||
| @@ -364,7 +364,8 @@ class InfoExtractor(object): | ||||
|         if display_name is None: | ||||
|             display_name = name | ||||
|         return self._html_search_regex( | ||||
|             r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\']) | ||||
|             r'''(?ix)<meta | ||||
|                     (?=[^>]+(?:itemprop|name|property)=["\']%s["\']) | ||||
|                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), | ||||
|             html, display_name, fatal=False) | ||||
|  | ||||
|   | ||||
| @@ -28,7 +28,8 @@ class DaumIE(InfoExtractor): | ||||
|         video_id = mobj.group(1) | ||||
|         canonical_url = 'http://tvpot.daum.net/v/%s' % video_id | ||||
|         webpage = self._download_webpage(canonical_url, video_id) | ||||
|         full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', | ||||
|         full_id = self._search_regex( | ||||
|             r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]', | ||||
|             webpage, u'full id') | ||||
|         query = compat_urllib_parse.urlencode({'vid': full_id}) | ||||
|         info = self._download_xml( | ||||
| @@ -56,7 +57,7 @@ class DaumIE(InfoExtractor): | ||||
|                 'format_id': profile, | ||||
|             }) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info.find('TITLE').text, | ||||
|             'formats': formats, | ||||
| @@ -65,6 +66,3 @@ class DaumIE(InfoExtractor): | ||||
|             'duration': int(info.find('DURATION').text), | ||||
|             'upload_date': info.find('REGDTTM').text[:8], | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|         return info | ||||
|   | ||||
| @@ -65,7 +65,7 @@ class DreiSatIE(InfoExtractor): | ||||
|             return (qidx, prefer_http, format['video_bitrate']) | ||||
|         formats.sort(key=_sortkey) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
| @@ -76,8 +76,3 @@ class DreiSatIE(InfoExtractor): | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -44,13 +44,10 @@ class FazIE(InfoExtractor): | ||||
|             }) | ||||
|  | ||||
|         descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description') | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'formats': formats, | ||||
|             'description': descr, | ||||
|             'thumbnail': config.find('STILL/STILL_BIG').text, | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|         return info | ||||
|   | ||||
| @@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor): | ||||
|                 'format_id': q, | ||||
|             }) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': data_video['guid'], | ||||
|             'title': compat_urllib_parse.unquote(data_video['title']), | ||||
|             'formats': formats, | ||||
|             'description': get_meta_content('description', webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|         return info | ||||
|   | ||||
| @@ -1,13 +1,11 @@ | ||||
| import re | ||||
|  | ||||
| from .mtv import MTVIE, _media_xml_tag | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
|  | ||||
| class GametrailersIE(MTVIE): | ||||
|     """ | ||||
|     Gametrailers use the same videos system as MTVIE, it just changes the feed | ||||
|     url, where the uri is and the method to get the thumbnails. | ||||
|     """ | ||||
|  | ||||
| class GametrailersIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', | ||||
|         u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4', | ||||
| @@ -17,15 +15,9 @@ class GametrailersIE(MTVIE): | ||||
|             u'description': u'Faith is back!  Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!', | ||||
|         }, | ||||
|     } | ||||
|     # Overwrite MTVIE properties we don't want | ||||
|     _TESTS = [] | ||||
|  | ||||
|     _FEED_URL = 'http://www.gametrailers.com/feeds/mrss' | ||||
|  | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) | ||||
|         return itemdoc.find(search_path).attrib['url'] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|   | ||||
| @@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor): | ||||
|         description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>', | ||||
|             webpage, u'description', flags=re.DOTALL) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': clip.find('title').text, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'duration': int(clip.find('duration').text), | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|         return info | ||||
|   | ||||
| @@ -10,35 +10,8 @@ from ..utils import ( | ||||
| def _media_xml_tag(tag): | ||||
|     return '{http://search.yahoo.com/mrss/}%s' % tag | ||||
|  | ||||
| class MTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$' | ||||
|  | ||||
|     _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', | ||||
|             u'file': u'853555.mp4', | ||||
|             u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"', | ||||
|                 u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             u'add_ie': ['Vevo'], | ||||
|             u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', | ||||
|             u'file': u'USCJY1331283.mp4', | ||||
|             u'md5': u'73b4e7fcadd88929292fe52c3ced8caf', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Everything Has Changed', | ||||
|                 u'upload_date': u'20130606', | ||||
|                 u'uploader': u'Taylor Swift', | ||||
|             }, | ||||
|             u'skip': u'VEVO is only available in some countries', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
| class MTVServicesInfoExtractor(InfoExtractor): | ||||
|     @staticmethod | ||||
|     def _id_from_uri(uri): | ||||
|         return uri.split(':')[-1] | ||||
| @@ -53,7 +26,12 @@ class MTVIE(InfoExtractor): | ||||
|         return base + m.group('finalid') | ||||
|  | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         return 'http://mtv.mtvnimages.com/uri/' + uri | ||||
|         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) | ||||
|         thumb_node = itemdoc.find(search_path) | ||||
|         if thumb_node is None: | ||||
|             return None | ||||
|         else: | ||||
|             return thumb_node.attrib['url'] | ||||
|  | ||||
|     def _extract_video_formats(self, metadataXml): | ||||
|         if '/error_country_block.swf' in metadataXml: | ||||
| @@ -93,7 +71,7 @@ class MTVIE(InfoExtractor): | ||||
|         else: | ||||
|             description = None | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'title': itemdoc.find('title').text, | ||||
|             'formats': self._extract_video_formats(mediagen_page), | ||||
|             'id': video_id, | ||||
| @@ -101,11 +79,6 @@ class MTVIE(InfoExtractor): | ||||
|             'description': description, | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(info['formats'][-1]) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|     def _get_videos_info(self, uri): | ||||
|         video_id = self._id_from_uri(uri) | ||||
|         data = compat_urllib_parse.urlencode({'uri': uri}) | ||||
| @@ -113,6 +86,39 @@ class MTVIE(InfoExtractor): | ||||
|                                          u'Downloading info') | ||||
|         return [self._get_video_info(item) for item in idoc.findall('.//item')] | ||||
|  | ||||
|  | ||||
| class MTVIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$' | ||||
|  | ||||
|     _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', | ||||
|             u'file': u'853555.mp4', | ||||
|             u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"', | ||||
|                 u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             u'add_ie': ['Vevo'], | ||||
|             u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', | ||||
|             u'file': u'USCJY1331283.mp4', | ||||
|             u'md5': u'73b4e7fcadd88929292fe52c3ced8caf', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Everything Has Changed', | ||||
|                 u'upload_date': u'20130606', | ||||
|                 u'uploader': u'Taylor Swift', | ||||
|             }, | ||||
|             u'skip': u'VEVO is only available in some countries', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         return 'http://mtv.mtvnimages.com/uri/' + uri | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|   | ||||
| @@ -56,7 +56,7 @@ class NaverIE(InfoExtractor): | ||||
|                 'height': int(format_el.find('height').text), | ||||
|             }) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info.find('Subject').text, | ||||
|             'formats': formats, | ||||
| @@ -65,6 +65,3 @@ class NaverIE(InfoExtractor): | ||||
|             'upload_date': info.find('WriteDate').text.replace('.', ''), | ||||
|             'view_count': int(info.find('PlayCount').text), | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|         return info | ||||
|   | ||||
| @@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor): | ||||
|             r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL') | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>', | ||||
|             r'<h1 class="videoTitle[^"]*">(.+?)</h1>', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         # No self-labeling, but they describe themselves as | ||||
|   | ||||
							
								
								
									
										252
									
								
								youtube_dl/extractor/smotri.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										252
									
								
								youtube_dl/extractor/smotri.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,252 @@ | ||||
| # encoding: utf-8 | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import hashlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SmotriIE(InfoExtractor): | ||||
|     IE_DESC = u'Smotri.com' | ||||
|     IE_NAME = u'smotri' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         # real video id 2610366 | ||||
|         { | ||||
|             u'url': u'http://smotri.com/video/view/?id=v261036632ab', | ||||
|             u'file': u'v261036632ab.mp4', | ||||
|             u'md5': u'2a7b08249e6f5636557579c368040eb9', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'катастрофа с камер видеонаблюдения', | ||||
|                 u'uploader': u'rbc2008', | ||||
|                 u'uploader_id': u'rbc08', | ||||
|                 u'upload_date': u'20131118', | ||||
|                 u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения', | ||||
|                 u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', | ||||
|             }, | ||||
|         }, | ||||
|         # real video id 57591 | ||||
|         { | ||||
|             u'url': u'http://smotri.com/video/view/?id=v57591cb20', | ||||
|             u'file': u'v57591cb20.flv', | ||||
|             u'md5': u'830266dfc21f077eac5afd1883091bcd', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'test', | ||||
|                 u'uploader': u'Support Photofile@photofile', | ||||
|                 u'uploader_id': u'support-photofile', | ||||
|                 u'upload_date': u'20070704', | ||||
|                 u'description': u'test, видео test', | ||||
|                 u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', | ||||
|             }, | ||||
|         }, | ||||
|         # video-password | ||||
|         { | ||||
|             u'url': u'http://smotri.com/video/view/?id=v1390466a13c', | ||||
|             u'file': u'v1390466a13c.mp4', | ||||
|             u'md5': u'f6331cef33cad65a0815ee482a54440b', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', | ||||
|                 u'uploader': u'timoxa40', | ||||
|                 u'uploader_id': u'timoxa40', | ||||
|                 u'upload_date': u'20100404', | ||||
|                 u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', | ||||
|                 u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', | ||||
|             }, | ||||
|             u'params': { | ||||
|                 u'videopassword': u'qwerty', | ||||
|             }, | ||||
|         }, | ||||
|         # age limit + video-password | ||||
|         { | ||||
|             u'url': u'http://smotri.com/video/view/?id=v15408898bcf', | ||||
|             u'file': u'v15408898bcf.flv', | ||||
|             u'md5': u'91e909c9f0521adf5ee86fbe073aad70', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'этот ролик не покажут по ТВ', | ||||
|                 u'uploader': u'zzxxx', | ||||
|                 u'uploader_id': u'ueggb', | ||||
|                 u'upload_date': u'20101001', | ||||
|                 u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', | ||||
|                 u'age_limit': 18, | ||||
|                 u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ', | ||||
|             }, | ||||
|             u'params': { | ||||
|                 u'videopassword': u'333' | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|      | ||||
|     _SUCCESS = 0 | ||||
|     _PASSWORD_NOT_VERIFIED = 1 | ||||
|     _PASSWORD_DETECTED = 2 | ||||
|     _VIDEO_NOT_FOUND = 3 | ||||
|  | ||||
|     def _search_meta(self, name, html, display_name=None): | ||||
|         if display_name is None: | ||||
|             display_name = name | ||||
|         return self._html_search_regex( | ||||
|             r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name), | ||||
|             html, display_name, fatal=False) | ||||
|         return self._html_search_meta(name, html, display_name) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         real_video_id = mobj.group('realvideoid') | ||||
|  | ||||
|         # Download video JSON data | ||||
|         video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id | ||||
|         video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON') | ||||
|         video_json = json.loads(video_json_page) | ||||
|          | ||||
|         status = video_json['status'] | ||||
|         if status == self._VIDEO_NOT_FOUND: | ||||
|             raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) | ||||
|         elif status == self._PASSWORD_DETECTED:  # The video is protected by a password, retry with | ||||
|                                                 # video-password set | ||||
|             video_password = self._downloader.params.get('videopassword', None) | ||||
|             if not video_password: | ||||
|                 raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True) | ||||
|             video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest() | ||||
|             video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)') | ||||
|             video_json = json.loads(video_json_page) | ||||
|             status = video_json['status'] | ||||
|             if status == self._PASSWORD_NOT_VERIFIED: | ||||
|                 raise ExtractorError(u'Video password is invalid', expected=True) | ||||
|          | ||||
|         if status != self._SUCCESS: | ||||
|             raise ExtractorError(u'Unexpected status value %s' % status) | ||||
|          | ||||
|         # Extract the URL of the video | ||||
|         video_url = video_json['file_data'] | ||||
|          | ||||
|         # Video JSON does not provide enough meta data | ||||
|         # We will extract some from the video web page instead | ||||
|         video_page_url = 'http://' + mobj.group('url') | ||||
|         video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') | ||||
|          | ||||
|         # Adult content | ||||
|         if re.search(u'EroConfirmText">', video_page) is not None: | ||||
|             self.report_age_confirmation() | ||||
|             confirm_string = self._html_search_regex( | ||||
|                 r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id, | ||||
|                 video_page, u'confirm string') | ||||
|             confirm_url = video_page_url + '&confirm=%s' % confirm_string | ||||
|             video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)') | ||||
|             adult_content = True | ||||
|         else: | ||||
|             adult_content = False | ||||
|          | ||||
|         # Extract the rest of meta data | ||||
|         video_title = self._search_meta(u'name', video_page, u'title') | ||||
|         if not video_title: | ||||
|             video_title = video_url.rsplit('/', 1)[-1] | ||||
|  | ||||
|         video_description = self._search_meta(u'description', video_page) | ||||
|         END_TEXT = u' на сайте Smotri.com' | ||||
|         if video_description.endswith(END_TEXT): | ||||
|             video_description = video_description[:-len(END_TEXT)] | ||||
|         START_TEXT = u'Смотреть онлайн ролик ' | ||||
|         if video_description.startswith(START_TEXT): | ||||
|             video_description = video_description[len(START_TEXT):] | ||||
|         video_thumbnail = self._search_meta(u'thumbnail', video_page) | ||||
|  | ||||
|         upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date') | ||||
|         upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str) | ||||
|         video_upload_date = ( | ||||
|             ( | ||||
|                 upload_date_m.group('year') + | ||||
|                 upload_date_m.group('month') + | ||||
|                 upload_date_m.group('day') | ||||
|             ) | ||||
|             if upload_date_m else None | ||||
|         ) | ||||
|          | ||||
|         duration_str = self._search_meta(u'duration', video_page) | ||||
|         duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str) | ||||
|         video_duration = ( | ||||
|             ( | ||||
|                 (int(duration_m.group('hours')) * 60 * 60) + | ||||
|                 (int(duration_m.group('minutes')) * 60) + | ||||
|                 int(duration_m.group('seconds')) | ||||
|             ) | ||||
|             if duration_m else None | ||||
|         ) | ||||
|          | ||||
|         video_uploader = self._html_search_regex( | ||||
|             u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>', | ||||
|             video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL) | ||||
|          | ||||
|         video_uploader_id = self._html_search_regex( | ||||
|             u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">', | ||||
|             video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL) | ||||
|          | ||||
|         video_view_count = self._html_search_regex( | ||||
|             u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>', | ||||
|             video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL) | ||||
|                  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': video_title, | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'description': video_description, | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date': video_upload_date, | ||||
|             'uploader_id': video_uploader_id, | ||||
|             'video_duration': video_duration, | ||||
|             'view_count': video_view_count, | ||||
|             'age_limit': 18 if adult_content else 0, | ||||
|             'video_page_url': video_page_url | ||||
|         } | ||||
|  | ||||
|  | ||||
| class SmotriCommunityIE(InfoExtractor): | ||||
|     IE_DESC = u'Smotri.com community videos' | ||||
|     IE_NAME = u'smotri:community' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)' | ||||
|      | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         community_id = mobj.group('communityid') | ||||
|  | ||||
|         url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id | ||||
|         rss = self._download_xml(url, community_id, u'Downloading community RSS') | ||||
|  | ||||
|         entries = [self.url_result(video_url.text, 'Smotri') | ||||
|                    for video_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         description_text = rss.find('./channel/description').text | ||||
|         community_title = self._html_search_regex( | ||||
|             u'^Видео сообщества "([^"]+)"$', description_text, u'community title') | ||||
|  | ||||
|         return self.playlist_result(entries, community_id, community_title) | ||||
|  | ||||
|  | ||||
| class SmotriUserIE(InfoExtractor): | ||||
|     IE_DESC = u'Smotri.com user videos' | ||||
|     IE_NAME = u'smotri:user' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         user_id = mobj.group('userid') | ||||
|  | ||||
|         url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id | ||||
|         rss = self._download_xml(url, user_id, u'Downloading user RSS') | ||||
|  | ||||
|         entries = [self.url_result(video_url.text, 'Smotri') | ||||
|                    for video_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         description_text = rss.find('./channel/description').text | ||||
|         user_nickname = self._html_search_regex( | ||||
|             u'^Видео режиссера (.*)$', description_text, | ||||
|             u'user nickname') | ||||
|  | ||||
|         return self.playlist_result(entries, user_id, user_nickname) | ||||
| @@ -1,15 +1,14 @@ | ||||
| import re | ||||
|  | ||||
| from .mtv import MTVIE, _media_xml_tag | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
|  | ||||
|  | ||||
| class SouthParkStudiosIE(MTVIE): | ||||
| class SouthParkStudiosIE(MTVServicesInfoExtractor): | ||||
|     IE_NAME = u'southparkstudios.com' | ||||
|     _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' | ||||
|  | ||||
|     _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' | ||||
|  | ||||
|     # Overwrite MTVIE properties we don't want | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', | ||||
|         u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', | ||||
| @@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE): | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) | ||||
|         thumb_node = itemdoc.find(search_path) | ||||
|         if thumb_node is None: | ||||
|             return None | ||||
|         else: | ||||
|             return thumb_node.attrib['url'] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         url = u'http://www.' + mobj.group(u'url') | ||||
|   | ||||
| @@ -55,7 +55,7 @@ class TriluliluIE(InfoExtractor): | ||||
|             for fnode in format_doc.findall('./formats/format') | ||||
|         ] | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
| @@ -64,7 +64,3 @@ class TriluliluIE(InfoExtractor): | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -47,7 +47,7 @@ class ViddlerIE(InfoExtractor): | ||||
|             r"thumbnail\s*:\s*'([^']*)'", | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
| @@ -56,9 +56,3 @@ class ViddlerIE(InfoExtractor): | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url']) | ||||
|         info.update(info['formats'][-1]) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -26,7 +26,7 @@ class XHamsterIE(InfoExtractor): | ||||
|     { | ||||
|         u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', | ||||
|         u'file': u'2221348.flv', | ||||
|         u'md5': u'e767b9475de189320f691f49c679c4c7', | ||||
|         u'md5': u'970a94178ca4118c5aa3aaea21211b81', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20130914", | ||||
|             u"uploader_id": u"jojo747400", | ||||
|   | ||||
| @@ -336,7 +336,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 u"uploader": u"Philipp Hagemeister", | ||||
|                 u"uploader_id": u"phihag", | ||||
|                 u"upload_date": u"20121002", | ||||
|                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." | ||||
|                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
| @@ -1366,6 +1366,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         # description | ||||
|         video_description = get_element_by_id("eow-description", video_webpage) | ||||
|         if video_description: | ||||
|             video_description = re.sub(r'''(?x) | ||||
|                 <a\s+ | ||||
|                     (?:[a-zA-Z-]+="[^"]+"\s+)*? | ||||
|                     title="([^"]+)"\s+ | ||||
|                     (?:[a-zA-Z-]+="[^"]+"\s+)*? | ||||
|                     class="yt-uix-redirect-link"\s*> | ||||
|                 [^<]+ | ||||
|                 </a> | ||||
|             ''', r'\1', video_description) | ||||
|             video_description = clean_html(video_description) | ||||
|         else: | ||||
|             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage) | ||||
| @@ -1765,6 +1774,7 @@ class YoutubeSearchIE(SearchInfoExtractor): | ||||
|         return self.playlist_result(videos, query) | ||||
|  | ||||
| class YoutubeSearchDateIE(YoutubeSearchIE): | ||||
|     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' | ||||
|     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' | ||||
|     _SEARCH_KEY = 'ytsearchdate' | ||||
|     IE_DESC = u'YouTube.com searches, newest videos first' | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2013.12.02' | ||||
| __version__ = '2013.12.04' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user