[bandcamp:weekly] Improve and extract more metadata (closes #12758)
This commit is contained in:
		| @@ -14,6 +14,7 @@ from ..utils import ( | |||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     KNOWN_EXTENSIONS, | ||||||
|     parse_filesize, |     parse_filesize, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|     update_url_query, |     update_url_query, | ||||||
| @@ -22,7 +23,7 @@ from ..utils import ( | |||||||
|  |  | ||||||
|  |  | ||||||
| class BandcampIE(InfoExtractor): | class BandcampIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)' |     _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', |         'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', | ||||||
|         'md5': 'c557841d5e50261777a6585648adf439', |         'md5': 'c557841d5e50261777a6585648adf439', | ||||||
| @@ -156,7 +157,7 @@ class BandcampIE(InfoExtractor): | |||||||
|  |  | ||||||
| class BandcampAlbumIE(InfoExtractor): | class BandcampAlbumIE(InfoExtractor): | ||||||
|     IE_NAME = 'Bandcamp:album' |     IE_NAME = 'Bandcamp:album' | ||||||
|     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))' |     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?' | ||||||
|  |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', |         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', | ||||||
| @@ -225,7 +226,9 @@ class BandcampAlbumIE(InfoExtractor): | |||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def suitable(cls, url): |     def suitable(cls, url): | ||||||
|         return False if BandcampWeeklyIE.suitable(url) else super(BandcampAlbumIE, cls).suitable(url) |         return (False | ||||||
|  |                 if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url) | ||||||
|  |                 else super(BandcampAlbumIE, cls).suitable(url)) | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
| @@ -258,16 +261,22 @@ class BandcampAlbumIE(InfoExtractor): | |||||||
|  |  | ||||||
|  |  | ||||||
| class BandcampWeeklyIE(InfoExtractor): | class BandcampWeeklyIE(InfoExtractor): | ||||||
|     IE_NAME = 'Bandcamp:bandcamp_weekly' |     IE_NAME = 'Bandcamp:weekly' | ||||||
|     _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*&)?show=(?P<id>\d+)(?:$|[&#])' |     _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://bandcamp.com/?show=224', |         'url': 'https://bandcamp.com/?show=224', | ||||||
|         'md5': 'b00df799c733cf7e0c567ed187dea0fd', |         'md5': 'b00df799c733cf7e0c567ed187dea0fd', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '224', |             'id': '224', | ||||||
|             'ext': 'opus', |             'ext': 'opus', | ||||||
|             'title': 'BC Weekly April 4th 2017: Magic Moments', |             'title': 'BC Weekly April 4th 2017 - Magic Moments', | ||||||
|             'description': 'Stones Throw\'s Vex Ruffin, plus up and coming singer Salami Rose Joe Louis, in conversation about their fantastic DIY albums.', |             'description': 'md5:5d48150916e8e02d030623a48512c874', | ||||||
|  |             'duration': 5829.77, | ||||||
|  |             'release_date': '20170404', | ||||||
|  |             'series': 'Bandcamp Weekly', | ||||||
|  |             'episode': 'Magic Moments', | ||||||
|  |             'episode_number': 208, | ||||||
|  |             'episode_id': '224', | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://bandcamp.com/?blah/blah@&show=228', |         'url': 'https://bandcamp.com/?blah/blah@&show=228', | ||||||
| @@ -288,32 +297,53 @@ class BandcampWeeklyIE(InfoExtractor): | |||||||
|  |  | ||||||
|         # This is desired because any invalid show id redirects to `bandcamp.com` |         # This is desired because any invalid show id redirects to `bandcamp.com` | ||||||
|         # which happens to expose the latest Bandcamp Weekly episode. |         # which happens to expose the latest Bandcamp Weekly episode. | ||||||
|         video_id = compat_str(show['show_id']) |         show_id = int_or_none(show.get('show_id')) or int_or_none(video_id) | ||||||
|  |  | ||||||
|         def to_format_dictionaries(audio_stream): |         formats = [] | ||||||
|             dictionaries = [{'format_id': kvp[0], 'url': kvp[1]} for kvp in audio_stream.items()] |         for format_id, format_url in show['audio_stream'].items(): | ||||||
|             known_extensions = ['mp3', 'opus'] |             if not isinstance(format_url, compat_str): | ||||||
|  |                 continue | ||||||
|             for dictionary in dictionaries: |             for known_ext in KNOWN_EXTENSIONS: | ||||||
|                 for ext in known_extensions: |                 if known_ext in format_id: | ||||||
|                     if ext in dictionary['format_id']: |                     ext = known_ext | ||||||
|                         dictionary['ext'] = ext |  | ||||||
|                     break |                     break | ||||||
|  |             else: | ||||||
|             return dictionaries |                 ext = None | ||||||
|  |             formats.append({ | ||||||
|         formats = to_format_dictionaries(show['audio_stream']) |                 'format_id': format_id, | ||||||
|  |                 'url': format_url, | ||||||
|  |                 'ext': ext, | ||||||
|  |                 'vcodec': 'none', | ||||||
|  |             }) | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         title = show.get('audio_title') or 'Bandcamp Weekly' | ||||||
|  |         subtitle = show.get('subtitle') | ||||||
|  |         if subtitle: | ||||||
|  |             title += ' - %s' % subtitle | ||||||
|  |  | ||||||
|  |         episode_number = None | ||||||
|  |         seq = blob.get('bcw_seq') | ||||||
|  |  | ||||||
|  |         if seq and isinstance(seq, list): | ||||||
|  |             try: | ||||||
|  |                 episode_number = next( | ||||||
|  |                     int_or_none(e.get('episode_number')) | ||||||
|  |                     for e in seq | ||||||
|  |                     if isinstance(e, dict) and int_or_none(e.get('id')) == show_id) | ||||||
|  |             except StopIteration: | ||||||
|  |                 pass | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': show['audio_title'] + ': ' + show['subtitle'], |             'title': title, | ||||||
|             'description': show.get('desc'), |             'description': show.get('desc') or show.get('short_desc'), | ||||||
|             'duration': float_or_none(show.get('audio_duration')), |             'duration': float_or_none(show.get('audio_duration')), | ||||||
|             'webpage_url': 'https://bandcamp.com/?show=' + video_id, |  | ||||||
|             'is_live': False, |             'is_live': False, | ||||||
|             'release_date': unified_strdate(show.get('published_date')), |             'release_date': unified_strdate(show.get('published_date')), | ||||||
|             'series': 'Bandcamp Weekly', |             'series': 'Bandcamp Weekly', | ||||||
|  |             'episode': show.get('subtitle'), | ||||||
|  |             'episode_number': episode_number, | ||||||
|             'episode_id': compat_str(video_id), |             'episode_id': compat_str(video_id), | ||||||
|             'formats': formats |             'formats': formats | ||||||
|         } |         } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․