Compare commits

..

2 Commits

Author SHA1 Message Date
Filippo Valsorda
97bc05116e Merge branch 'master' into totalwebcasting 2018-01-07 15:03:28 +01:00
Filippo Valsorda
7608a91ee7 [totalwebcasting] Add new extractor 2017-01-11 18:51:25 -05:00
23 changed files with 304 additions and 749 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.14**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.31*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.31**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.01.14
[debug] youtube-dl version 2017.12.31
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -231,5 +231,3 @@ John Dong
Tatsuyuki Ishi
Daniel Weber
Kay Bouché
Yang Hongbo
Lei Wang

View File

@@ -1,42 +1,9 @@
version 2018.01.14
version <unreleased>
Extractors
* [youtube] Fix live streams extraction (#15202)
* [wdr] Bypass geo restriction
* [wdr] Rework extractors (#14598)
+ [wdr] Add support for wdrmaus.de/elefantenseite (#14598)
+ [gamestar] Add support for gamepro.de (#3384)
* [viafree] Skip rtmp formats (#15232)
+ [pandoratv] Add support for mobile URLs (#12441)
+ [pandoratv] Add support for new URL format (#15131)
+ [ximalaya] Add support for ximalaya.com (#14687)
+ [digg] Add support for digg.com (#15214)
* [limelight] Tolerate empty pc formats (#15150, #15151, #15207)
* [ndr:embed:base] Make separate formats extraction non fatal (#15203)
+ [weibo] Add extractor (#15079)
+ [ok] Add support for live streams
* [canalplus] Fix extraction (#15072)
* [bilibili] Fix extraction (#15188)
version 2018.01.07
Core
* [utils] Fix youtube-dl under PyPy3 on Windows
* [YoutubeDL] Output python implementation in debug header
Extractors
+ [jwplatform] Add support for multiple embeds (#15192)
* [mitele] Fix extraction (#15186)
+ [motherless] Add support for groups (#15124)
* [lynda] Relax URL regular expression (#15185)
* [soundcloud] Fallback to avatar picture for thumbnail (#12878)
* [youku] Fix list extraction (#15135)
* [openload] Fix extraction (#15166)
* [lynda] Skip invalid subtitles (#15159)
* [twitch] Pass video id to url_result when extracting playlist (#15139)
* [rtve.es:alacarta] Fix extraction of some new URLs
* [acast] Fix extraction (#15147)
version 2017.12.31

View File

@@ -128,7 +128,7 @@
- **CamdemyFolder**
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: mycanal.fr and piwiplus.fr
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **Canvas**
- **CanvasEen**: canvas.be and een.be
- **CarambaTV**
@@ -210,7 +210,6 @@
- **defense.gouv.fr**
- **democracynow**
- **DHM**: Filmarchiv - Deutsches Historisches Museum
- **Digg**
- **DigitallySpeaking**
- **Digiteka**
- **Discovery**
@@ -479,7 +478,6 @@
- **Moniker**: allmyvideos.net and vidspot.net
- **Morningstar**: morningstar.com
- **Motherless**
- **MotherlessGroup**
- **Motorsport**: motorsport.com
- **MovieClips**
- **MovieFap**
@@ -774,6 +772,7 @@
- **Sport5**
- **SportBoxEmbed**
- **SportDeutschland**
- **Sportschau**
- **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR**
@@ -1002,14 +1001,10 @@
- **WatchIndianPorn**: Watch Indian Porn
- **WDR**
- **wdr:mobile**
- **WDRElefant**
- **WDRPage**
- **Webcaster**
- **WebcasterFeed**
- **WebOfStories**
- **WebOfStoriesPlaylist**
- **Weibo**
- **WeiboMobile**
- **WeiqiTV**: WQTV
- **wholecloud**: WholeCloud
- **Wimp**
@@ -1029,8 +1024,6 @@
- **xiami:artist**: 虾米音乐 - 歌手
- **xiami:collection**: 虾米音乐 - 精选集
- **xiami:song**: 虾米音乐
- **ximalaya**: 喜马拉雅FM
- **ximalaya:album**: 喜马拉雅FM 专辑
- **XMinus**
- **XNXX**
- **Xstream**

View File

@@ -102,7 +102,6 @@ class BiliBiliIE(InfoExtractor):
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
headers = {
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': url
}
headers.update(self.geo_verification_headers())
@@ -117,15 +116,10 @@ class BiliBiliIE(InfoExtractor):
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
headers = {
'Referer': url
}
headers.update(self.geo_verification_headers())
video_info = self._download_json(
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
video_id, note='Downloading video info page',
headers=headers)
headers=self.geo_verification_headers())
if 'durl' not in video_info:
self._report_error(video_info)

View File

@@ -4,36 +4,59 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
dict_get,
# ExtractorError,
# HEADRequest,
int_or_none,
qualities,
remove_end,
unified_strdate,
)
class CanalplusIE(InfoExtractor):
IE_DESC = 'mycanal.fr and piwiplus.fr'
_VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)'
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
_VALID_URL = r'''(?x)
https?://
(?:
(?:
(?:(?:www|m)\.)?canalplus\.fr|
(?:www\.)?piwiplus\.fr|
(?:www\.)?d8\.tv|
(?:www\.)?c8\.fr|
(?:www\.)?d17\.tv|
(?:(?:football|www)\.)?cstar\.fr|
(?:www\.)?itele\.fr
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
player\.canalplus\.fr/#/(?P<id>\d+)
)
'''
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
_SITE_ID_MAP = {
'mycanal': 'cplus',
'canalplus': 'cplus',
'piwiplus': 'teletoon',
'd8': 'd8',
'c8': 'd8',
'd17': 'd17',
'cstar': 'd17',
'itele': 'itele',
}
# Only works for direct mp4 URLs
_GEO_COUNTRIES = ['FR']
_TESTS = [{
'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061',
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
'info_dict': {
'id': '1397061',
'display_id': 'lolywood',
'id': '1405510',
'display_id': 'pid1830-c-zapping',
'ext': 'mp4',
'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34',
'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e',
'upload_date': '20160602',
'title': 'Zapping - 02/07/2016',
'description': 'Le meilleur de toutes les chaînes, tous les jours',
'upload_date': '20160702',
},
}, {
# geo restricted, bypassed
@@ -47,12 +70,64 @@ class CanalplusIE(InfoExtractor):
'upload_date': '20140724',
},
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
# geo restricted, bypassed
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684',
'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d',
'info_dict': {
'id': '1443684',
'display_id': 'pid6318-videos-integrales',
'ext': 'mp4',
'title': 'Guess my iep ! - TPMP - 07/04/2017',
'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa',
'upload_date': '20170407',
},
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
'info_dict': {
'id': '1420176',
'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
'ext': 'mp4',
'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
'upload_date': '20161014',
},
}, {
'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
'info_dict': {
'id': '1416769',
'display_id': 'pid7566-feminines-videos',
'ext': 'mp4',
'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
'upload_date': '20160921',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://m.canalplus.fr/?vid=1398231',
'only_matching': True,
}, {
'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061',
'only_matching': True,
}]
def _real_extract(self, url):
site, display_id, video_id = re.match(self._VALID_URL, url).groups()
mobj = re.match(self._VALID_URL, url)
site_id = self._SITE_ID_MAP[site]
site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
# Beware, some subclasses do not define an id group
display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
r'id=["\']canal_video_player(?P<id>\d+)',
r'data-video=["\'](?P<id>\d+)'],
webpage, 'video id', default=mobj.group('vid'), group='id')
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
@@ -86,7 +161,7 @@ class CanalplusIE(InfoExtractor):
format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False))
else:
formats.append({
# the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js
# the secret extracted ya function in http://player.canalplus.fr/common/js/canalPlayer.js
'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',
'format_id': format_id,
'preference': preference(format_id),

View File

@@ -1,56 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import js_to_json
class DiggIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P<id>[^/?#&]+)'
_TESTS = [{
# JWPlatform via provider
'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out',
'info_dict': {
'id': 'LcqvmS0b',
'ext': 'mp4',
'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'",
'description': 'md5:541bb847648b6ee3d6514bc84b82efda',
'upload_date': '20180109',
'timestamp': 1515530551,
},
'params': {
'skip_download': True,
},
}, {
# Youtube via provider
'url': 'http://digg.com/video/dog-boat-seal-play',
'only_matching': True,
}, {
# vimeo as regular embed
'url': 'http://digg.com/video/dream-girl-short-film',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
info = self._parse_json(
self._search_regex(
r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info',
default='{}'), display_id, transform_source=js_to_json,
fatal=False)
video_id = info.get('video_id')
if video_id:
provider = info.get('provider_name')
if provider == 'youtube':
return self.url_result(
video_id, ie='Youtube', video_id=video_id)
elif provider == 'jwplayer':
return self.url_result(
'jwplatform:%s' % video_id, ie='JWPlatform',
video_id=video_id)
return self.url_result(url, 'Generic')

View File

@@ -259,7 +259,6 @@ from .deezer import DeezerPlaylistIE
from .democracynow import DemocracynowIE
from .dfb import DFBIE
from .dhm import DHMIE
from .digg import DiggIE
from .dotsub import DotsubIE
from .douyutv import (
DouyuShowIE,
@@ -991,6 +990,7 @@ from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxEmbedIE
from .sportdeutschland import SportDeutschlandIE
from .sportschau import SportschauIE
from .sprout import SproutIE
from .srgssr import (
SRGSSRIE,
@@ -1068,6 +1068,7 @@ from .tnaflix import (
from .toggle import ToggleIE
from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE
from .totalwebcasting import TotalWebCastingIE
from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
@@ -1288,8 +1289,6 @@ from .watchbox import WatchBoxIE
from .watchindianporn import WatchIndianPornIE
from .wdr import (
WDRIE,
WDRPageIE,
WDRElefantIE,
WDRMobileIE,
)
from .webcaster import (
@@ -1300,10 +1299,6 @@ from .webofstories import (
WebOfStoriesIE,
WebOfStoriesPlaylistIE,
)
from .weibo import (
WeiboIE,
WeiboMobileIE
)
from .weiqitv import WeiqiTVIE
from .wimp import WimpIE
from .wistia import WistiaIE
@@ -1329,10 +1324,6 @@ from .xiami import (
XiamiArtistIE,
XiamiCollectionIE
)
from .ximalaya import (
XimalayaIE,
XimalayaAlbumIE
)
from .xminus import XMinusIE
from .xnxx import XNXXIE
from .xstream import XstreamIE

View File

@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -11,52 +9,44 @@ from ..utils import (
class GameStarIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?game(?P<site>pro|star)\.de/videos/.*,(?P<id>[0-9]+)\.html'
_TESTS = [{
_VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
_TEST = {
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
'md5': 'ee782f1f8050448c95c5cacd63bc851c',
'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
'info_dict': {
'id': '76110',
'ext': 'mp4',
'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1406542380,
'timestamp': 1406542020,
'upload_date': '20140728',
'duration': 17,
'duration': 17
}
}, {
'url': 'http://www.gamepro.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html',
'only_matching': True,
}, {
'url': 'http://www.gamestar.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html',
'only_matching': True,
}]
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site = mobj.group('site')
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
# TODO: there are multiple ld+json objects in the webpage,
# while _search_json_ld finds only the first one
json_ld = self._parse_json(self._search_regex(
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>[^<]+VideoObject[^<]+)</script>',
webpage, 'JSON-LD', group='json_ld'), video_id)
info_dict = self._json_ld(json_ld, video_id)
info_dict['title'] = remove_end(
info_dict['title'], ' - Game%s' % site.title())
info_dict['title'] = remove_end(info_dict['title'], ' - GameStar')
view_count = int_or_none(json_ld.get('interactionCount'))
view_count = json_ld.get('interactionCount')
comment_count = int_or_none(self._html_search_regex(
r'<span>Kommentare</span>\s*<span[^>]+class=["\']count[^>]+>\s*\(\s*([0-9]+)',
webpage, 'comment count', fatal=False))
r'([0-9]+) Kommentare</span>', webpage, 'comment_count',
fatal=False))
info_dict.update({
'id': video_id,
'url': 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id,
'url': url,
'ext': 'mp4',
'view_count': view_count,
'comment_count': comment_count

View File

@@ -2708,9 +2708,9 @@ class GenericIE(InfoExtractor):
return self.url_result(viewlift_url)
# Look for JWPlatform embeds
jwplatform_urls = JWPlatformIE._extract_urls(webpage)
if jwplatform_urls:
return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
jwplatform_url = JWPlatformIE._extract_url(webpage)
if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform')
# Look for Digiteka embeds
digiteka_url = DigitekaIE._extract_url(webpage)

View File

@@ -23,14 +23,11 @@ class JWPlatformIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
urls = JWPlatformIE._extract_urls(webpage)
return urls[0] if urls else None
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//content\.jwplatform\.com/players/[a-zA-Z0-9]{8})',
mobj = re.search(
r'<(?:script|iframe)[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -10,7 +10,6 @@ from ..utils import (
float_or_none,
int_or_none,
smuggle_url,
try_get,
unsmuggle_url,
ExtractorError,
)
@@ -221,12 +220,6 @@ class LimelightBaseIE(InfoExtractor):
'subtitles': subtitles,
}
def _extract_info_helper(self, pc, mobile, i, metadata):
return self._extract_info(
try_get(pc, lambda x: x['playlistItems'][i]['streams'], list) or [],
try_get(mobile, lambda x: x['mediaList'][i]['mobileUrls'], list) or [],
metadata)
class LimelightMediaIE(LimelightBaseIE):
IE_NAME = 'limelight'
@@ -289,7 +282,10 @@ class LimelightMediaIE(LimelightBaseIE):
'getMobilePlaylistByMediaId', 'properties',
smuggled_data.get('source_url'))
return self._extract_info_helper(pc, mobile, 0, metadata)
return self._extract_info(
pc['playlistItems'][0].get('streams', []),
mobile['mediaList'][0].get('mobileUrls', []) if mobile else [],
metadata)
class LimelightChannelIE(LimelightBaseIE):
@@ -330,7 +326,10 @@ class LimelightChannelIE(LimelightBaseIE):
'media', smuggled_data.get('source_url'))
entries = [
self._extract_info_helper(pc, mobile, i, medias['media_list'][i])
self._extract_info(
pc['playlistItems'][i].get('streams', []),
mobile['mediaList'][i].get('mobileUrls', []) if mobile else [],
medias['media_list'][i])
for i in range(len(medias['media_list']))]
return self.playlist_result(entries, channel_id, pc['title'])

View File

@@ -190,12 +190,10 @@ class NDREmbedBaseIE(InfoExtractor):
ext = determine_ext(src, None)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
f4m_id='hds', fatal=False))
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', m3u8_id='hls',
entry_protocol='m3u8_native', fatal=False))
src, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'))
else:
quality = f.get('quality')
ff = {

View File

@@ -19,11 +19,11 @@ from ..utils import (
class OdnoklassnikiIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer|live)/(?P<id>[\d-]+)'
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
_TESTS = [{
# metadata in JSON
'url': 'http://ok.ru/video/20079905452',
'md5': '0b62089b479e06681abaaca9d204f152',
'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc',
'info_dict': {
'id': '20079905452',
'ext': 'mp4',
@@ -35,6 +35,7 @@ class OdnoklassnikiIE(InfoExtractor):
'like_count': int,
'age_limit': 0,
},
'skip': 'Video has been blocked',
}, {
# metadataUrl
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
@@ -98,9 +99,6 @@ class OdnoklassnikiIE(InfoExtractor):
}, {
'url': 'http://mobile.ok.ru/video/20079905452',
'only_matching': True,
}, {
'url': 'https://www.ok.ru/live/484531969818',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -186,10 +184,6 @@ class OdnoklassnikiIE(InfoExtractor):
})
return info
assert title
if provider == 'LIVE_TV_APP':
info['title'] = self._live_title(title)
quality = qualities(('4', '0', '1', '2', '3', '5'))
formats = [{
@@ -216,20 +210,6 @@ class OdnoklassnikiIE(InfoExtractor):
if fmt_type:
fmt['quality'] = quality(fmt_type)
# Live formats
m3u8_url = metadata.get('hlsMasterPlaylistUrl')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8',
m3u8_id='hls', fatal=False))
rtmp_url = metadata.get('rtmpUrl')
if rtmp_url:
formats.append({
'url': rtmp_url,
'format_id': 'rtmp',
'ext': 'flv',
})
self._sort_formats(formats)
info['formats'] = formats

View File

@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -20,14 +18,7 @@ from ..utils import (
class PandoraTVIE(InfoExtractor):
IE_NAME = 'pandora.tv'
IE_DESC = '판도라TV'
_VALID_URL = r'''(?x)
https?://
(?:
(?:www\.)?pandora\.tv/view/(?P<user_id>[^/]+)/(?P<id>\d+)| # new format
(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?| # old format
m\.pandora\.tv/?\? # mobile
)
'''
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
_TESTS = [{
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
'info_dict': {
@@ -62,25 +53,14 @@ class PandoraTVIE(InfoExtractor):
# Test metadata only
'skip_download': True,
},
}, {
'url': 'http://www.pandora.tv/view/mikakim/53294230#36797454_new',
'only_matching': True,
}, {
'url': 'http://m.pandora.tv/?c=view&ch_userid=mikakim&prgid=54600346',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('user_id')
video_id = mobj.group('id')
if not user_id or not video_id:
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = qs.get('prgid', [None])[0]
user_id = qs.get('ch_userid', [None])[0]
if any(not f for f in (video_id, user_id,)):
raise ExtractorError('Invalid URL', expected=True)
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = qs.get('prgid', [None])[0]
user_id = qs.get('ch_userid', [None])[0]
if any(not f for f in (video_id, user_id,)):
raise ExtractorError('Invalid URL', expected=True)
data = self._download_json(
'http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid=%s&prgid=%s'

View File

@@ -0,0 +1,38 @@
# coding: utf-8
from __future__ import unicode_literals
from .wdr import WDRBaseIE
from ..utils import get_element_by_attribute
class SportschauIE(WDRBaseIE):
IE_NAME = 'Sportschau'
_VALID_URL = r'https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video-?(?P<id>[^/#?]+)\.html'
_TEST = {
'url': 'http://www.sportschau.de/uefaeuro2016/videos/video-dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100.html',
'info_dict': {
'id': 'mdb-1140188',
'display_id': 'dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100',
'ext': 'mp4',
'title': 'DFB-Team geht gut gelaunt ins Spiel gegen Polen',
'description': 'Vor dem zweiten Gruppenspiel gegen Polen herrscht gute Stimmung im deutschen Team. Insbesondere Bastian Schweinsteiger strotzt vor Optimismus nach seinem Tor gegen die Ukraine.',
'upload_date': '20160615',
},
'skip': 'Geo-restricted to Germany',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = get_element_by_attribute('class', 'headline', webpage)
description = self._html_search_meta('description', webpage, 'description')
info = self._extract_wdr_video(webpage, video_id)
info.update({
'title': title,
'description': description,
})
return info

View File

@@ -0,0 +1,50 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class TotalWebCastingIE(InfoExtractor):
IE_NAME = 'totalwebcasting.com'
_VALID_URL = r'https?://www\.totalwebcasting\.com/view/\?func=VOFF.*'
_TEST = {
'url': 'https://www.totalwebcasting.com/view/?func=VOFF&id=columbia&date=2017-01-04&seq=1',
'info_dict': {
'id': '270e1c415d443924485f547403180906731570466a42740764673853041316737548',
'title': 'Real World Cryptography Conference 2017',
'description': 'md5:47a31e91ed537a2bb0d3a091659dc80c',
},
'playlist_count': 6,
}
def _real_extract(self, url):
params = url.split('?', 1)[1]
webpage = self._download_webpage(url, params)
aprm = self._search_regex(r"startVideo\('(\w+)'", webpage, 'aprm')
VLEV = self._download_json("https://www.totalwebcasting.com/view/?func=VLEV&aprm=%s&style=G" % aprm, aprm)
parts = []
for s in VLEV["aiTimes"].values():
n = int(s[:-5])
if n == 99:
continue
if n not in parts:
parts.append(n)
parts.sort()
title = VLEV["title"]
entries = []
for p in parts:
VLEV = self._download_json("https://www.totalwebcasting.com/view/?func=VLEV&aprm=%s&style=G&refP=1&nf=%d&time=1&cs=1&ns=1" % (aprm, p), aprm)
for s in VLEV["playerObj"]["clip"]["sources"]:
if s["type"] != "video/mp4":
continue
entries.append({
"id": "%s_part%d" % (aprm, p),
"url": "https:" + s["src"],
"title": title,
})
return {
'_type': 'multi_video',
'id': aprm,
'entries': entries,
'title': title,
'description': VLEV.get("desc"),
}

View File

@@ -273,8 +273,6 @@ class TVPlayIE(InfoExtractor):
'ext': ext,
}
if video_url.startswith('rtmp'):
if smuggled_data.get('skip_rtmp'):
continue
m = re.search(
r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
if not m:
@@ -436,10 +434,6 @@ class ViafreeIE(InfoExtractor):
return self.url_result(
smuggle_url(
'mtg:%s' % video_id,
{
'geo_countries': [
compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]],
# rtmp host mtgfs.fplive.net for viafree is unresolvable
'skip_rtmp': True,
}),
{'geo_countries': [
compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]]}),
ie=TVPlayIE.ie_key(), video_id=video_id)

View File

@@ -4,50 +4,49 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
determine_ext,
ExtractorError,
js_to_json,
strip_jsonp,
try_get,
unified_strdate,
update_url_query,
urlhandle_detect_ext,
)
class WDRIE(InfoExtractor):
_VALID_URL = r'https?://deviceids-medp\.wdr\.de/ondemand/\d+/(?P<id>\d+)\.js'
_GEO_COUNTRIES = ['DE']
_TEST = {
'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
'info_dict': {
'id': 'mdb-1557833',
'ext': 'mp4',
'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
'upload_date': '20180112',
},
}
class WDRBaseIE(InfoExtractor):
def _extract_wdr_video(self, webpage, display_id):
# for wdr.de the data-extension is in a tag with the class "mediaLink"
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
# for wdrmaus, in a tag with the class "videoButton" (previously a link
# to the page in a multiline "videoLink"-tag)
json_metadata = self._html_search_regex(
r'''(?sx)class=
(?:
(["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
(["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
)data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
''',
webpage, 'media link', default=None, group='data')
def _real_extract(self, url):
video_id = self._match_id(url)
if not json_metadata:
return
media_link_obj = self._parse_json(json_metadata, display_id,
transform_source=js_to_json)
jsonp_url = media_link_obj['mediaObj']['url']
metadata = self._download_json(
url, video_id, transform_source=strip_jsonp)
jsonp_url, display_id, transform_source=strip_jsonp)
is_live = metadata.get('mediaType') == 'live'
tracker_data = metadata['trackerData']
media_resource = metadata['mediaResource']
metadata_tracker_data = metadata['trackerData']
metadata_media_resource = metadata['mediaResource']
formats = []
# check if the metadata contains a direct URL to a file
for kind, media_resource in media_resource.items():
for kind, media_resource in metadata_media_resource.items():
if kind not in ('dflt', 'alt'):
continue
@@ -58,13 +57,13 @@ class WDRIE(InfoExtractor):
ext = determine_ext(medium_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
medium_url, video_id, 'mp4', 'm3u8_native',
medium_url, display_id, 'mp4', 'm3u8_native',
m3u8_id='hls'))
elif ext == 'f4m':
manifest_url = update_url_query(
medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'})
formats.extend(self._extract_f4m_formats(
manifest_url, video_id, f4m_id='hds', fatal=False))
manifest_url, display_id, f4m_id='hds', fatal=False))
elif ext == 'smil':
formats.extend(self._extract_smil_formats(
medium_url, 'stream', fatal=False))
@@ -74,7 +73,7 @@ class WDRIE(InfoExtractor):
}
if ext == 'unknown_video':
urlh = self._request_webpage(
medium_url, video_id, note='Determining extension')
medium_url, display_id, note='Determining extension')
ext = urlhandle_detect_ext(urlh)
a_format['ext'] = ext
formats.append(a_format)
@@ -82,30 +81,30 @@ class WDRIE(InfoExtractor):
self._sort_formats(formats)
subtitles = {}
caption_url = media_resource.get('captionURL')
caption_url = metadata_media_resource.get('captionURL')
if caption_url:
subtitles['de'] = [{
'url': caption_url,
'ext': 'ttml',
}]
title = tracker_data['trackerClipTitle']
title = metadata_tracker_data['trackerClipTitle']
return {
'id': tracker_data.get('trackerClipId', video_id),
'title': self._live_title(title) if is_live else title,
'alt_title': tracker_data.get('trackerClipSubcategory'),
'id': metadata_tracker_data.get('trackerClipId', display_id),
'display_id': display_id,
'title': title,
'alt_title': metadata_tracker_data.get('trackerClipSubcategory'),
'formats': formats,
'subtitles': subtitles,
'upload_date': unified_strdate(tracker_data.get('trackerClipAirTime')),
'is_live': is_live,
'upload_date': unified_strdate(metadata_tracker_data.get('trackerClipAirTime')),
}
class WDRPageIE(InfoExtractor):
class WDRIE(WDRBaseIE):
_CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
_VALID_URL = r'https?://(?:www\d?\.)?(?:wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
_PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
_VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
_TESTS = [
{
@@ -125,7 +124,6 @@ class WDRPageIE(InfoExtractor):
'ext': 'ttml',
}]},
},
'skip': 'HTTP Error 404: Not Found',
},
{
'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html',
@@ -141,17 +139,19 @@ class WDRPageIE(InfoExtractor):
'is_live': False,
'subtitles': {}
},
'skip': 'HTTP Error 404: Not Found',
},
{
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
'info_dict': {
'id': 'mdb-1406149',
'id': 'mdb-103364',
'ext': 'mp4',
'title': r're:^WDR Fernsehen im Livestream \(nur in Deutschland erreichbar\) [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'display_id': 'index',
'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'alt_title': 'WDR Fernsehen Live',
'upload_date': '20150101',
'upload_date': None,
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
'is_live': True,
'subtitles': {}
},
'params': {
'skip_download': True, # m3u8 download
@@ -159,18 +159,19 @@ class WDRPageIE(InfoExtractor):
},
{
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
'playlist_mincount': 7,
'playlist_mincount': 8,
'info_dict': {
'id': 'aktuelle-stunde-120',
'id': 'aktuelle-stunde/aktuelle-stunde-120',
},
},
{
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
'info_dict': {
'id': 'mdb-1552552',
'id': 'mdb-1323501',
'ext': 'mp4',
'upload_date': 're:^[0-9]{8}$',
'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
'description': 'Die Seite mit der Maus -',
},
'skip': 'The id changes from week to week because of the new episode'
},
@@ -182,6 +183,7 @@ class WDRPageIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20130919',
'title': 'Sachgeschichte - Achterbahn ',
'description': 'Die Seite mit der Maus -',
},
},
{
@@ -189,114 +191,52 @@ class WDRPageIE(InfoExtractor):
# Live stream, MD5 unstable
'info_dict': {
'id': 'mdb-869971',
'ext': 'mp4',
'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'ext': 'flv',
'title': 'COSMO Livestream',
'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20160101',
},
'params': {
'skip_download': True, # m3u8 download
}
},
{
'url': 'http://www.sportschau.de/handballem2018/handball-nationalmannschaft-em-stolperstein-vorrunde-100.html',
'info_dict': {
'id': 'mdb-1556012',
'ext': 'mp4',
'title': 'DHB-Vizepräsident Bob Hanning - "Die Weltspitze ist extrem breit"',
'upload_date': '20180111',
},
'params': {
'skip_download': True,
},
},
{
'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
'only_matching': True,
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url_type = mobj.group('type')
page_url = mobj.group('page_url')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
entries = []
info_dict = self._extract_wdr_video(webpage, display_id)
# Article with several videos
# for wdr.de the data-extension is in a tag with the class "mediaLink"
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
# for wdrmaus, in a tag with the class "videoButton" (previously a link
# to the page in a multiline "videoLink"-tag)
for mobj in re.finditer(
r'''(?sx)class=
(?:
(["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
(["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
)data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
''', webpage):
media_link_obj = self._parse_json(
mobj.group('data'), display_id, transform_source=js_to_json,
fatal=False)
if not media_link_obj:
continue
jsonp_url = try_get(
media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
if jsonp_url:
entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
# Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
if not entries:
if not info_dict:
entries = [
self.url_result(
compat_urlparse.urljoin(url, mobj.group('href')),
ie=WDRPageIE.ie_key())
for mobj in re.finditer(
r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension=',
webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
self.url_result(page_url + href[0], 'WDR')
for href in re.findall(
r'<a href="(%s)"[^>]+data-extension=' % self._PAGE_REGEX,
webpage)
]
return self.playlist_result(entries, playlist_id=display_id)
if entries: # Playlist page
return self.playlist_result(entries, playlist_id=display_id)
raise ExtractorError('No downloadable streams found', expected=True)
class WDRElefantIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
_TEST = {
'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015',
'info_dict': {
'title': 'Folge Oster-Spezial 2015',
'id': 'mdb-1088195',
'ext': 'mp4',
'age_limit': None,
'upload_date': '20150406'
},
'params': {
'skip_download': True,
},
}
is_live = url_type == 'live'
def _real_extract(self, url):
display_id = self._match_id(url)
if is_live:
info_dict.update({
'title': self._live_title(info_dict['title']),
'upload_date': None,
})
elif 'upload_date' not in info_dict:
info_dict['upload_date'] = unified_strdate(self._html_search_meta('DC.Date', webpage, 'upload date'))
# Table of Contents seems to always be at this address, so fetch it directly.
# The website fetches configurationJS.php5, which links to tableOfContentsJS.php5.
table_of_contents = self._download_json(
'https://www.wdrmaus.de/elefantenseite/data/tableOfContentsJS.php5',
display_id)
if display_id not in table_of_contents:
raise ExtractorError(
'No entry in site\'s table of contents for this URL. '
'Is the fragment part of the URL (after the #) correct?',
expected=True)
xml_metadata_path = table_of_contents[display_id]['xmlPath']
xml_metadata = self._download_xml(
'https://www.wdrmaus.de/elefantenseite/' + xml_metadata_path,
display_id)
zmdb_url_element = xml_metadata.find('./movie/zmdb_url')
if zmdb_url_element is None:
raise ExtractorError(
'%s is not a video' % display_id, expected=True)
return self.url_result(zmdb_url_element.text, ie=WDRIE.ie_key())
info_dict.update({
'description': self._html_search_meta('Description', webpage),
'is_live': is_live,
})
return info_dict
class WDRMobileIE(InfoExtractor):

View File

@@ -1,140 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
import json
import random
import re
from ..compat import (
compat_parse_qs,
compat_str,
)
from ..utils import (
js_to_json,
strip_jsonp,
urlencode_postdata,
)
class WeiboIE(InfoExtractor):
_VALID_URL = r'https?://weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
_TEST = {
'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
'info_dict': {
'id': 'Fp6RGfbff',
'ext': 'mp4',
'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
# to get Referer url for genvisitor
webpage, urlh = self._download_webpage_handle(url, video_id)
visitor_url = urlh.geturl()
if 'passport.weibo.com' in visitor_url:
# first visit
visitor_data = self._download_json(
'https://passport.weibo.com/visitor/genvisitor', video_id,
note='Generating first-visit data',
transform_source=strip_jsonp,
headers={'Referer': visitor_url},
data=urlencode_postdata({
'cb': 'gen_callback',
'fp': json.dumps({
'os': '2',
'browser': 'Gecko57,0,0,0',
'fonts': 'undefined',
'screenInfo': '1440*900*24',
'plugins': '',
}),
}))
tid = visitor_data['data']['tid']
cnfd = '%03d' % visitor_data['data']['confidence']
self._download_webpage(
'https://passport.weibo.com/visitor/visitor', video_id,
note='Running first-visit callback',
query={
'a': 'incarnate',
't': tid,
'w': 2,
'c': cnfd,
'cb': 'cross_domain',
'from': 'weibo',
'_rand': random.random(),
})
webpage = self._download_webpage(
url, video_id, note='Revisiting webpage')
title = self._html_search_regex(
r'<title>(.+?)</title>', webpage, 'title')
video_formats = compat_parse_qs(self._search_regex(
r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
formats = []
supported_resolutions = (480, 720)
for res in supported_resolutions:
vid_urls = video_formats.get(compat_str(res))
if not vid_urls or not isinstance(vid_urls, list):
continue
vid_url = vid_urls[0]
formats.append({
'url': vid_url,
'height': res,
})
self._sort_formats(formats)
uploader = self._og_search_property(
'nick-name', webpage, 'uploader', default=None)
return {
'id': video_id,
'title': title,
'uploader': uploader,
'formats': formats
}
class WeiboMobileIE(InfoExtractor):
_VALID_URL = r'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
_TEST = {
'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
'info_dict': {
'id': '4189191225395228',
'ext': 'mp4',
'title': '午睡当然是要甜甜蜜蜜的啦',
'uploader': '柴犬柴犬'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
# to get Referer url for genvisitor
webpage = self._download_webpage(url, video_id, note='visit the page')
weibo_info = self._parse_json(self._search_regex(
r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
webpage, 'js_code', flags=re.DOTALL),
video_id, transform_source=js_to_json)
status_data = weibo_info.get('status', {})
page_info = status_data.get('page_info')
title = status_data['status_title']
uploader = status_data.get('user', {}).get('screen_name')
return {
'id': video_id,
'title': title,
'uploader': uploader,
'url': page_info['media_info']['stream_url']
}

View File

@@ -1,233 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
class XimalayaBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['CN']
class XimalayaIE(XimalayaBaseIE):
IE_NAME = 'ximalaya'
IE_DESC = '喜马拉雅FM'
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?P<uid>[0-9]+)/sound/(?P<id>[0-9]+)'
_USER_URL_FORMAT = '%s://www.ximalaya.com/zhubo/%i/'
_TESTS = [
{
'url': 'http://www.ximalaya.com/61425525/sound/47740352/',
'info_dict': {
'id': '47740352',
'ext': 'm4a',
'uploader': '小彬彬爱听书',
'uploader_id': 61425525,
'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
'thumbnails': [
{
'name': 'cover_url',
'url': r're:^https?://.*\.jpg$',
},
{
'name': 'cover_url_142',
'url': r're:^https?://.*\.jpg$',
'width': 180,
'height': 180
}
],
'categories': ['renwen', '人文'],
'duration': 93,
'view_count': int,
'like_count': int,
}
},
{
'url': 'http://m.ximalaya.com/61425525/sound/47740352/',
'info_dict': {
'id': '47740352',
'ext': 'm4a',
'uploader': '小彬彬爱听书',
'uploader_id': 61425525,
'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
'thumbnails': [
{
'name': 'cover_url',
'url': r're:^https?://.*\.jpg$',
},
{
'name': 'cover_url_142',
'url': r're:^https?://.*\.jpg$',
'width': 180,
'height': 180
}
],
'categories': ['renwen', '人文'],
'duration': 93,
'view_count': int,
'like_count': int,
}
},
{
'url': 'https://www.ximalaya.com/11045267/sound/15705996/',
'info_dict': {
'id': '15705996',
'ext': 'm4a',
'uploader': '李延隆老师',
'uploader_id': 11045267,
'uploader_url': 'https://www.ximalaya.com/zhubo/11045267/',
'title': 'Lesson 1 Excuse me!',
'description': "contains:Listen to the tape then answer\xa0this question. Whose handbag is it?\n"
"听录音,然后回答问题,这是谁的手袋?",
'thumbnails': [
{
'name': 'cover_url',
'url': r're:^https?://.*\.jpg$',
},
{
'name': 'cover_url_142',
'url': r're:^https?://.*\.jpg$',
'width': 180,
'height': 180
}
],
'categories': ['train', '外语'],
'duration': 40,
'view_count': int,
'like_count': int,
}
},
]
def _real_extract(self, url):
is_m = 'm.ximalaya' in url
scheme = 'https' if url.startswith('https') else 'http'
audio_id = self._match_id(url)
webpage = self._download_webpage(url, audio_id,
note='Download sound page for %s' % audio_id,
errnote='Unable to get sound page')
audio_info_file = '%s://m.ximalaya.com/tracks/%s.json' % (scheme, audio_id)
audio_info = self._download_json(audio_info_file, audio_id,
'Downloading info json %s' % audio_info_file,
'Unable to download info file')
formats = []
for bps, k in (('24k', 'play_path_32'), ('64k', 'play_path_64')):
if audio_info.get(k):
formats.append({
'format_id': bps,
'url': audio_info[k],
})
thumbnails = []
for k in audio_info.keys():
# cover pics kyes like: cover_url', 'cover_url_142'
if k.startswith('cover_url'):
thumbnail = {'name': k, 'url': audio_info[k]}
if k == 'cover_url_142':
thumbnail['width'] = 180
thumbnail['height'] = 180
thumbnails.append(thumbnail)
audio_uploader_id = audio_info.get('uid')
if is_m:
audio_description = self._html_search_regex(r'(?s)<section\s+class=["\']content[^>]+>(.+?)</section>',
webpage, 'audio_description', fatal=False)
else:
audio_description = self._html_search_regex(r'(?s)<div\s+class=["\']rich_intro[^>]*>(.+?</article>)',
webpage, 'audio_description', fatal=False)
if not audio_description:
audio_description_file = '%s://www.ximalaya.com/sounds/%s/rich_intro' % (scheme, audio_id)
audio_description = self._download_webpage(audio_description_file, audio_id,
note='Downloading description file %s' % audio_description_file,
errnote='Unable to download descrip file',
fatal=False)
audio_description = audio_description.strip() if audio_description else None
return {
'id': audio_id,
'uploader': audio_info.get('nickname'),
'uploader_id': audio_uploader_id,
'uploader_url': self._USER_URL_FORMAT % (scheme, audio_uploader_id) if audio_uploader_id else None,
'title': audio_info['title'],
'thumbnails': thumbnails,
'description': audio_description,
'categories': list(filter(None, (audio_info.get('category_name'), audio_info.get('category_title')))),
'duration': audio_info.get('duration'),
'view_count': audio_info.get('play_count'),
'like_count': audio_info.get('favorites_count'),
'formats': formats,
}
class XimalayaAlbumIE(XimalayaBaseIE):
IE_NAME = 'ximalaya:album'
IE_DESC = '喜马拉雅FM 专辑'
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?P<uid>[0-9]+)/album/(?P<id>[0-9]+)'
_TEMPLATE_URL = '%s://www.ximalaya.com/%s/album/%s/'
_BASE_URL_TEMPL = '%s://www.ximalaya.com%s'
_LIST_VIDEO_RE = r'<a[^>]+?href="(?P<url>/%s/sound/(?P<id>\d+)/?)"[^>]+?title="(?P<title>[^>]+)">'
_TESTS = [{
'url': 'http://www.ximalaya.com/61425525/album/5534601/',
'info_dict': {
'title': '唐诗三百首(含赏析)',
'id': '5534601',
},
'playlist_count': 312,
}, {
'url': 'http://m.ximalaya.com/61425525/album/5534601',
'info_dict': {
'title': '唐诗三百首(含赏析)',
'id': '5534601',
},
'playlist_count': 312,
},
]
def _real_extract(self, url):
self.scheme = scheme = 'https' if url.startswith('https') else 'http'
mobj = re.match(self._VALID_URL, url)
uid, playlist_id = mobj.group('uid'), mobj.group('id')
webpage = self._download_webpage(self._TEMPLATE_URL % (scheme, uid, playlist_id), playlist_id,
note='Download album page for %s' % playlist_id,
errnote='Unable to get album info')
title = self._html_search_regex(r'detailContent_title[^>]*><h1(?:[^>]+)?>([^<]+)</h1>',
webpage, 'title', fatal=False)
return self.playlist_result(self._entries(webpage, playlist_id, uid), playlist_id, title)
def _entries(self, page, playlist_id, uid):
html = page
for page_num in itertools.count(1):
for entry in self._process_page(html, uid):
yield entry
next_url = self._search_regex(r'<a\s+href=(["\'])(?P<more>[\S]+)\1[^>]+rel=(["\'])next\3',
html, 'list_next_url', default=None, group='more')
if not next_url:
break
next_full_url = self._BASE_URL_TEMPL % (self.scheme, next_url)
html = self._download_webpage(next_full_url, playlist_id)
def _process_page(self, html, uid):
find_from = html.index('album_soundlist')
for mobj in re.finditer(self._LIST_VIDEO_RE % uid, html[find_from:]):
yield self.url_result(self._BASE_URL_TEMPL % (self.scheme, mobj.group('url')),
XimalayaIE.ie_key(),
mobj.group('id'),
mobj.group('title'))

View File

@@ -1810,7 +1810,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': video_info['conn'][0],
'player_url': player_url,
}]
elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2018.01.14'
__version__ = '2017.12.31'