Compare commits
24 Commits
2016.08.06
...
2016.08.07
Author | SHA1 | Date | |
---|---|---|---|
![]() |
4a01befb34 | ||
![]() |
845dfcdc40 | ||
![]() |
d92cb46305 | ||
![]() |
a8795327ca | ||
![]() |
d34995a9e3 | ||
![]() |
958849275f | ||
![]() |
998f094452 | ||
![]() |
aaa42cf0cf | ||
![]() |
9fb64c04cd | ||
![]() |
f9622868e7 | ||
![]() |
37768f9242 | ||
![]() |
a1aadd09a4 | ||
![]() |
b47a75017b | ||
![]() |
e37b54b140 | ||
![]() |
c1decda58c | ||
![]() |
d3f8e038fe | ||
![]() |
ad152e2d95 | ||
![]() |
b0af12154e | ||
![]() |
d16b3c6677 | ||
![]() |
c57244cdb1 | ||
![]() |
a7e5f27412 | ||
![]() |
089a40955c | ||
![]() |
d73ebac100 | ||
![]() |
e563c0d73b |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.06**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.07**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2016.08.06
|
[debug] youtube-dl version 2016.08.07
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
26
ChangeLog
26
ChangeLog
@@ -1,3 +1,29 @@
|
|||||||
|
version 2016.08.07
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ Add support for TV Parental Guidelines ratings in parse_age_limit
|
||||||
|
+ Add decode_png (#9706)
|
||||||
|
+ Add support for partOfTVSeries in JSON-LD
|
||||||
|
* Lower master M3U8 manifest preference for better format sorting
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [discoverygo] Add extractor (#10245)
|
||||||
|
* [flipagram] Make JSON-LD extraction non fatal
|
||||||
|
* [generic] Make JSON-LD extraction non fatal
|
||||||
|
+ [bbc] Add support for morph embeds (#10239)
|
||||||
|
* [tnaflixnetworkbase] Improve title extraction
|
||||||
|
* [tnaflix] Fix metadata extraction (#10249)
|
||||||
|
* [fox] Fix theplatform release URL query
|
||||||
|
* [openload] Fix extraction (#9706)
|
||||||
|
* [bbc] Skip duplicate manifest URLs
|
||||||
|
* [bbc] Improve format code
|
||||||
|
+ [bbc] Add support for DASH and F4M
|
||||||
|
* [bbc] Improve format sorting and listing
|
||||||
|
* [bbc] Improve playlist extraction
|
||||||
|
+ [pokemon] Add extractor (#10093)
|
||||||
|
+ [condenast] Add fallback scenario for video info extraction
|
||||||
|
|
||||||
|
|
||||||
version 2016.08.06
|
version 2016.08.06
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
@@ -182,6 +182,7 @@
|
|||||||
- **DigitallySpeaking**
|
- **DigitallySpeaking**
|
||||||
- **Digiteka**
|
- **Digiteka**
|
||||||
- **Discovery**
|
- **Discovery**
|
||||||
|
- **DiscoveryGo**
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
- **DouyuTV**: 斗鱼
|
- **DouyuTV**: 斗鱼
|
||||||
- **DPlay**
|
- **DPlay**
|
||||||
@@ -518,6 +519,7 @@
|
|||||||
- **plus.google**: Google Plus
|
- **plus.google**: Google Plus
|
||||||
- **pluzz.francetv.fr**
|
- **pluzz.francetv.fr**
|
||||||
- **podomatic**
|
- **podomatic**
|
||||||
|
- **Pokemon**
|
||||||
- **PolskieRadio**
|
- **PolskieRadio**
|
||||||
- **PornHd**
|
- **PornHd**
|
||||||
- **PornHub**: PornHub and Thumbzilla
|
- **PornHub**: PornHub and Thumbzilla
|
||||||
|
@@ -42,6 +42,7 @@ from youtube_dl.utils import (
|
|||||||
ohdave_rsa_encrypt,
|
ohdave_rsa_encrypt,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
parse_count,
|
parse_count,
|
||||||
@@ -432,6 +433,20 @@ class TestUtil(unittest.TestCase):
|
|||||||
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
||||||
'trailer.mp4')
|
'trailer.mp4')
|
||||||
|
|
||||||
|
def test_parse_age_limit(self):
|
||||||
|
self.assertEqual(parse_age_limit(None), None)
|
||||||
|
self.assertEqual(parse_age_limit(False), None)
|
||||||
|
self.assertEqual(parse_age_limit('invalid'), None)
|
||||||
|
self.assertEqual(parse_age_limit(0), 0)
|
||||||
|
self.assertEqual(parse_age_limit(18), 18)
|
||||||
|
self.assertEqual(parse_age_limit(21), 21)
|
||||||
|
self.assertEqual(parse_age_limit(22), None)
|
||||||
|
self.assertEqual(parse_age_limit('18'), 18)
|
||||||
|
self.assertEqual(parse_age_limit('18+'), 18)
|
||||||
|
self.assertEqual(parse_age_limit('PG-13'), 13)
|
||||||
|
self.assertEqual(parse_age_limit('TV-14'), 14)
|
||||||
|
self.assertEqual(parse_age_limit('TV-MA'), 17)
|
||||||
|
|
||||||
def test_parse_duration(self):
|
def test_parse_duration(self):
|
||||||
self.assertEqual(parse_duration(None), None)
|
self.assertEqual(parse_duration(None), None)
|
||||||
self.assertEqual(parse_duration(False), None)
|
self.assertEqual(parse_duration(False), None)
|
||||||
|
@@ -5,11 +5,13 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -229,51 +231,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||||
|
|
||||||
def _extract_connection(self, connection, programme_id):
|
|
||||||
formats = []
|
|
||||||
kind = connection.get('kind')
|
|
||||||
protocol = connection.get('protocol')
|
|
||||||
supplier = connection.get('supplier')
|
|
||||||
if protocol == 'http':
|
|
||||||
href = connection.get('href')
|
|
||||||
transfer_format = connection.get('transferFormat')
|
|
||||||
# ASX playlist
|
|
||||||
if supplier == 'asx':
|
|
||||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
|
||||||
formats.append({
|
|
||||||
'url': ref,
|
|
||||||
'format_id': 'ref%s_%s' % (i, supplier),
|
|
||||||
})
|
|
||||||
# Skip DASH until supported
|
|
||||||
elif transfer_format == 'dash':
|
|
||||||
pass
|
|
||||||
elif transfer_format == 'hls':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id=supplier, fatal=False))
|
|
||||||
# Direct link
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': href,
|
|
||||||
'format_id': supplier or kind or protocol,
|
|
||||||
})
|
|
||||||
elif protocol == 'rtmp':
|
|
||||||
application = connection.get('application', 'ondemand')
|
|
||||||
auth_string = connection.get('authString')
|
|
||||||
identifier = connection.get('identifier')
|
|
||||||
server = connection.get('server')
|
|
||||||
formats.append({
|
|
||||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
|
||||||
'play_path': identifier,
|
|
||||||
'app': '%s?%s' % (application, auth_string),
|
|
||||||
'page_url': 'http://www.bbc.co.uk',
|
|
||||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
|
||||||
'rtmp_live': False,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': supplier,
|
|
||||||
})
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_items(self, playlist):
|
def _extract_items(self, playlist):
|
||||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||||
|
|
||||||
@@ -294,46 +251,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
def _extract_connections(self, media):
|
def _extract_connections(self, media):
|
||||||
return self._findall_ns(media, './{%s}connection')
|
return self._findall_ns(media, './{%s}connection')
|
||||||
|
|
||||||
def _extract_video(self, media, programme_id):
|
|
||||||
formats = []
|
|
||||||
vbr = int_or_none(media.get('bitrate'))
|
|
||||||
vcodec = media.get('encoding')
|
|
||||||
service = media.get('service')
|
|
||||||
width = int_or_none(media.get('width'))
|
|
||||||
height = int_or_none(media.get('height'))
|
|
||||||
file_size = int_or_none(media.get('media_file_size'))
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
conn_formats = self._extract_connection(connection, programme_id)
|
|
||||||
for format in conn_formats:
|
|
||||||
format.update({
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'vbr': vbr,
|
|
||||||
'vcodec': vcodec,
|
|
||||||
'filesize': file_size,
|
|
||||||
})
|
|
||||||
if service:
|
|
||||||
format['format_id'] = '%s_%s' % (service, format['format_id'])
|
|
||||||
formats.extend(conn_formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_audio(self, media, programme_id):
|
|
||||||
formats = []
|
|
||||||
abr = int_or_none(media.get('bitrate'))
|
|
||||||
acodec = media.get('encoding')
|
|
||||||
service = media.get('service')
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
conn_formats = self._extract_connection(connection, programme_id)
|
|
||||||
for format in conn_formats:
|
|
||||||
format.update({
|
|
||||||
'format_id': '%s_%s' % (service, format['format_id']),
|
|
||||||
'abr': abr,
|
|
||||||
'acodec': acodec,
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
formats.extend(conn_formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _get_subtitles(self, media, programme_id):
|
def _get_subtitles(self, media, programme_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for connection in self._extract_connections(media):
|
for connection in self._extract_connections(media):
|
||||||
@@ -379,13 +296,87 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
def _process_media_selector(self, media_selection, programme_id):
|
def _process_media_selector(self, media_selection, programme_id):
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = None
|
subtitles = None
|
||||||
|
urls = []
|
||||||
|
|
||||||
for media in self._extract_medias(media_selection):
|
for media in self._extract_medias(media_selection):
|
||||||
kind = media.get('kind')
|
kind = media.get('kind')
|
||||||
if kind == 'audio':
|
if kind in ('video', 'audio'):
|
||||||
formats.extend(self._extract_audio(media, programme_id))
|
bitrate = int_or_none(media.get('bitrate'))
|
||||||
elif kind == 'video':
|
encoding = media.get('encoding')
|
||||||
formats.extend(self._extract_video(media, programme_id))
|
service = media.get('service')
|
||||||
|
width = int_or_none(media.get('width'))
|
||||||
|
height = int_or_none(media.get('height'))
|
||||||
|
file_size = int_or_none(media.get('media_file_size'))
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
href = connection.get('href')
|
||||||
|
if href in urls:
|
||||||
|
continue
|
||||||
|
if href:
|
||||||
|
urls.append(href)
|
||||||
|
conn_kind = connection.get('kind')
|
||||||
|
protocol = connection.get('protocol')
|
||||||
|
supplier = connection.get('supplier')
|
||||||
|
transfer_format = connection.get('transferFormat')
|
||||||
|
format_id = supplier or conn_kind or protocol
|
||||||
|
if service:
|
||||||
|
format_id = '%s_%s' % (service, format_id)
|
||||||
|
# ASX playlist
|
||||||
|
if supplier == 'asx':
|
||||||
|
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||||
|
formats.append({
|
||||||
|
'url': ref,
|
||||||
|
'format_id': 'ref%s_%s' % (i, format_id),
|
||||||
|
})
|
||||||
|
elif transfer_format == 'dash':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
href, programme_id, mpd_id=format_id, fatal=False))
|
||||||
|
elif transfer_format == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
|
elif transfer_format == 'hds':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
href, programme_id, f4m_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
if not service and not supplier and bitrate:
|
||||||
|
format_id += '-%d' % bitrate
|
||||||
|
fmt = {
|
||||||
|
'format_id': format_id,
|
||||||
|
'filesize': file_size,
|
||||||
|
}
|
||||||
|
if kind == 'video':
|
||||||
|
fmt.update({
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'vbr': bitrate,
|
||||||
|
'vcodec': encoding,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
fmt.update({
|
||||||
|
'abr': bitrate,
|
||||||
|
'acodec': encoding,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
if protocol == 'http':
|
||||||
|
# Direct link
|
||||||
|
fmt.update({
|
||||||
|
'url': href,
|
||||||
|
})
|
||||||
|
elif protocol == 'rtmp':
|
||||||
|
application = connection.get('application', 'ondemand')
|
||||||
|
auth_string = connection.get('authString')
|
||||||
|
identifier = connection.get('identifier')
|
||||||
|
server = connection.get('server')
|
||||||
|
fmt.update({
|
||||||
|
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||||
|
'play_path': identifier,
|
||||||
|
'app': '%s?%s' % (application, auth_string),
|
||||||
|
'page_url': 'http://www.bbc.co.uk',
|
||||||
|
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
elif kind == 'captions':
|
elif kind == 'captions':
|
||||||
subtitles = self.extract_subtitles(media, programme_id)
|
subtitles = self.extract_subtitles(media, programme_id)
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
@@ -589,7 +580,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '150615_telabyad_kentin_cogu',
|
'id': '150615_telabyad_kentin_cogu',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
|
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||||
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
|
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
|
||||||
'timestamp': 1434397334,
|
'timestamp': 1434397334,
|
||||||
'upload_date': '20150615',
|
'upload_date': '20150615',
|
||||||
@@ -654,6 +645,23 @@ class BBCIE(BBCCoUkIE):
|
|||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# single video embedded with Morph
|
||||||
|
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p041vhd0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Nigeria v Japan - Men's First Round",
|
||||||
|
'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
|
||||||
|
'duration': 7980,
|
||||||
|
'uploader': 'BBC Sport',
|
||||||
|
'uploader_id': 'bbc_sport',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Georestricted to UK',
|
||||||
}, {
|
}, {
|
||||||
# single video with playlist.sxml URL in playlist param
|
# single video with playlist.sxml URL in playlist param
|
||||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||||
@@ -820,13 +828,19 @@ class BBCIE(BBCCoUkIE):
|
|||||||
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
||||||
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
||||||
if playlist:
|
if playlist:
|
||||||
for key in ('progressiveDownload', 'streaming'):
|
entry = None
|
||||||
|
for key in ('streaming', 'progressiveDownload'):
|
||||||
playlist_url = playlist.get('%sUrl' % key)
|
playlist_url = playlist.get('%sUrl' % key)
|
||||||
if not playlist_url:
|
if not playlist_url:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
entries.append(self._extract_from_playlist_sxml(
|
info = self._extract_from_playlist_sxml(
|
||||||
playlist_url, playlist_id, timestamp))
|
playlist_url, playlist_id, timestamp)
|
||||||
|
if not entry:
|
||||||
|
entry = info
|
||||||
|
else:
|
||||||
|
entry['title'] = info['title']
|
||||||
|
entry['formats'].extend(info['formats'])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Some playlist URL may fail with 500, at the same time
|
# Some playlist URL may fail with 500, at the same time
|
||||||
# the other one may work fine (e.g.
|
# the other one may work fine (e.g.
|
||||||
@@ -834,6 +848,9 @@ class BBCIE(BBCCoUkIE):
|
|||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||||
continue
|
continue
|
||||||
raise
|
raise
|
||||||
|
if entry:
|
||||||
|
self._sort_formats(entry['formats'])
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
if entries:
|
if entries:
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
@@ -866,6 +883,50 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||||
|
# There are several setPayload calls may be present but the video
|
||||||
|
# seems to be always related to the first one
|
||||||
|
morph_payload = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||||
|
webpage, 'morph payload', default='{}'),
|
||||||
|
playlist_id, fatal=False)
|
||||||
|
if morph_payload:
|
||||||
|
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||||
|
for component in components:
|
||||||
|
if not isinstance(component, dict):
|
||||||
|
continue
|
||||||
|
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||||
|
if not lead_media:
|
||||||
|
continue
|
||||||
|
identifiers = lead_media.get('identifiers')
|
||||||
|
if not identifiers or not isinstance(identifiers, dict):
|
||||||
|
continue
|
||||||
|
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||||
|
if not programme_id:
|
||||||
|
continue
|
||||||
|
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
description = lead_media.get('summary')
|
||||||
|
uploader = lead_media.get('masterBrand')
|
||||||
|
uploader_id = lead_media.get('mid')
|
||||||
|
duration = None
|
||||||
|
duration_d = lead_media.get('duration')
|
||||||
|
if isinstance(duration_d, dict):
|
||||||
|
duration = parse_duration(dict_get(
|
||||||
|
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||||
|
return {
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
def extract_all(pattern):
|
def extract_all(pattern):
|
||||||
return list(filter(None, map(
|
return list(filter(None, map(
|
||||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
@@ -883,7 +944,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||||
if entries:
|
if entries:
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
[self.url_result(entry, 'BBCCoUk') for entry in entries],
|
[self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
|
||||||
playlist_id, playlist_title, playlist_description)
|
playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
||||||
|
@@ -846,7 +846,7 @@ class InfoExtractor(object):
|
|||||||
part_of_season = e.get('partOfSeason')
|
part_of_season = e.get('partOfSeason')
|
||||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
||||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||||
part_of_series = e.get('partOfSeries')
|
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
||||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||||
elif item_type == 'Article':
|
elif item_type == 'Article':
|
||||||
@@ -1140,7 +1140,7 @@ class InfoExtractor(object):
|
|||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8',
|
||||||
'preference': preference - 1 if preference else -1,
|
'preference': preference - 100 if preference else -100,
|
||||||
'resolution': 'multiple',
|
'resolution': 'multiple',
|
||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}
|
}
|
||||||
|
@@ -113,11 +113,19 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'target': params['id'],
|
'target': params['id'],
|
||||||
})
|
})
|
||||||
video_id = query['videoId']
|
video_id = query['videoId']
|
||||||
|
video_info = None
|
||||||
info_page = self._download_webpage(
|
info_page = self._download_webpage(
|
||||||
'http://player.cnevids.com/player/video.js',
|
'http://player.cnevids.com/player/video.js',
|
||||||
video_id, 'Downloading video info', query=query)
|
video_id, 'Downloading video info', query=query, fatal=False)
|
||||||
video_info = self._parse_json(self._search_regex(
|
if info_page:
|
||||||
r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
|
video_info = self._parse_json(self._search_regex(
|
||||||
|
r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
|
||||||
|
else:
|
||||||
|
info_page = self._download_webpage(
|
||||||
|
'http://player.cnevids.com/player/loader.js',
|
||||||
|
video_id, 'Downloading loader info', query=query)
|
||||||
|
video_info = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id)
|
||||||
title = video_info['title']
|
title = video_info['title']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
98
youtube_dl/extractor/discoverygo.py
Normal file
98
youtube_dl/extractor/discoverygo.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
parse_age_limit,
|
||||||
|
unescapeHTML,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DiscoveryGoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?discoverygo\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '57a33c536b66d1cd0345eeb1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Kiss First, Ask Questions Later!',
|
||||||
|
'description': 'md5:fe923ba34050eae468bffae10831cb22',
|
||||||
|
'duration': 2579,
|
||||||
|
'series': 'Love at First Kiss',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode_number': 1,
|
||||||
|
'age_limit': 14,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
container = extract_attributes(
|
||||||
|
self._search_regex(
|
||||||
|
r'(<div[^>]+class=["\']video-player-container[^>]+>)',
|
||||||
|
webpage, 'video container'))
|
||||||
|
|
||||||
|
video = self._parse_json(
|
||||||
|
unescapeHTML(container.get('data-video') or container.get('data-json')),
|
||||||
|
display_id)
|
||||||
|
|
||||||
|
title = video['name']
|
||||||
|
|
||||||
|
stream = video['stream']
|
||||||
|
STREAM_URL_SUFFIX = 'streamUrl'
|
||||||
|
formats = []
|
||||||
|
for stream_kind in ('', 'hds'):
|
||||||
|
suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
|
||||||
|
stream_url = stream.get('%s%s' % (stream_kind, suffix))
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
|
if stream_kind == '':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif stream_kind == 'hds':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
stream_url, display_id, f4m_id=stream_kind, fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
video_id = video.get('id') or display_id
|
||||||
|
description = video.get('description', {}).get('detailed')
|
||||||
|
duration = int_or_none(video.get('duration'))
|
||||||
|
|
||||||
|
series = video.get('show', {}).get('name')
|
||||||
|
season_number = int_or_none(video.get('season', {}).get('number'))
|
||||||
|
episode_number = int_or_none(video.get('episodeNumber'))
|
||||||
|
|
||||||
|
tags = video.get('tags')
|
||||||
|
age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
captions = stream.get('captions')
|
||||||
|
if isinstance(captions, list):
|
||||||
|
for caption in captions:
|
||||||
|
subtitle_url = caption.get('fileUrl')
|
||||||
|
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
|
||||||
|
not subtitle_url.startswith('http')):
|
||||||
|
continue
|
||||||
|
lang = caption.get('fileLang', 'en')
|
||||||
|
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'series': series,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'tags': tags,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
@@ -221,6 +221,7 @@ from .dvtv import DVTVIE
|
|||||||
from .dumpert import DumpertIE
|
from .dumpert import DumpertIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
|
from .discoverygo import DiscoveryGoIE
|
||||||
from .dispeak import DigitallySpeakingIE
|
from .dispeak import DigitallySpeakingIE
|
||||||
from .dropbox import DropboxIE
|
from .dropbox import DropboxIE
|
||||||
from .dw import (
|
from .dw import (
|
||||||
@@ -636,6 +637,7 @@ from .pluralsight import (
|
|||||||
PluralsightCourseIE,
|
PluralsightCourseIE,
|
||||||
)
|
)
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
|
from .pokemon import PokemonIE
|
||||||
from .polskieradio import PolskieRadioIE
|
from .polskieradio import PolskieRadioIE
|
||||||
from .porn91 import Porn91IE
|
from .porn91 import Porn91IE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
|
@@ -48,7 +48,7 @@ class FlipagramIE(InfoExtractor):
|
|||||||
flipagram = video_data['flipagram']
|
flipagram = video_data['flipagram']
|
||||||
video = flipagram['video']
|
video = flipagram['video']
|
||||||
|
|
||||||
json_ld = self._search_json_ld(webpage, video_id, default=False)
|
json_ld = self._search_json_ld(webpage, video_id, fatal=False)
|
||||||
title = json_ld.get('title') or flipagram['captionText']
|
title = json_ld.get('title') or flipagram['captionText']
|
||||||
description = json_ld.get('description') or flipagram.get('captionText')
|
description = json_ld.get('description') or flipagram.get('captionText')
|
||||||
|
|
||||||
|
@@ -2,7 +2,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import smuggle_url
|
from ..utils import (
|
||||||
|
smuggle_url,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FOXIE(InfoExtractor):
|
class FOXIE(InfoExtractor):
|
||||||
@@ -29,11 +32,12 @@ class FOXIE(InfoExtractor):
|
|||||||
|
|
||||||
release_url = self._parse_json(self._search_regex(
|
release_url = self._parse_json(self._search_regex(
|
||||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||||
video_id)['release_url'] + '&switch=http'
|
video_id)['release_url']
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'ThePlatform',
|
'ie_key': 'ThePlatform',
|
||||||
'url': smuggle_url(release_url, {'force_smil_url': True}),
|
'url': smuggle_url(update_url_query(
|
||||||
|
release_url, {'switch': 'http'}), {'force_smil_url': True}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
|
@@ -2241,7 +2241,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
# Looking for http://schema.org/VideoObject
|
||||||
json_ld = self._search_json_ld(
|
json_ld = self._search_json_ld(
|
||||||
webpage, video_id, default=None, expected_type='VideoObject')
|
webpage, video_id, fatal=False, expected_type='VideoObject')
|
||||||
if json_ld and json_ld.get('url'):
|
if json_ld and json_ld.get('url'):
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'title': video_title or info_dict['title'],
|
'title': video_title or info_dict['title'],
|
||||||
|
@@ -1,15 +1,14 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals, division
|
||||||
|
|
||||||
import re
|
import math
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_chr
|
from ..compat import compat_chr
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
decode_png,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
encode_base_n,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
mimetype2ext,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -41,60 +40,6 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def openload_level2_debase(m):
|
|
||||||
radix, num = int(m.group(1)) + 27, int(m.group(2))
|
|
||||||
return '"' + encode_base_n(num, radix) + '"'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def openload_level2(cls, txt):
|
|
||||||
# The function name is ǃ \u01c3
|
|
||||||
# Using escaped unicode literals does not work in Python 3.2
|
|
||||||
return re.sub(r'ǃ\((\d+),(\d+)\)', cls.openload_level2_debase, txt, re.UNICODE).replace('"+"', '')
|
|
||||||
|
|
||||||
# Openload uses a variant of aadecode
|
|
||||||
# openload_decode and related functions are originally written by
|
|
||||||
# vitas@matfyz.cz and released with public domain
|
|
||||||
# See https://github.com/rg3/youtube-dl/issues/8489
|
|
||||||
@classmethod
|
|
||||||
def openload_decode(cls, txt):
|
|
||||||
symbol_table = [
|
|
||||||
('_', '(゚Д゚) [゚Θ゚]'),
|
|
||||||
('a', '(゚Д゚) [゚ω゚ノ]'),
|
|
||||||
('b', '(゚Д゚) [゚Θ゚ノ]'),
|
|
||||||
('c', '(゚Д゚) [\'c\']'),
|
|
||||||
('d', '(゚Д゚) [゚ー゚ノ]'),
|
|
||||||
('e', '(゚Д゚) [゚Д゚ノ]'),
|
|
||||||
('f', '(゚Д゚) [1]'),
|
|
||||||
|
|
||||||
('o', '(゚Д゚) [\'o\']'),
|
|
||||||
('u', '(o゚ー゚o)'),
|
|
||||||
('c', '(゚Д゚) [\'c\']'),
|
|
||||||
|
|
||||||
('7', '((゚ー゚) + (o^_^o))'),
|
|
||||||
('6', '((o^_^o) +(o^_^o) +(c^_^o))'),
|
|
||||||
('5', '((゚ー゚) + (゚Θ゚))'),
|
|
||||||
('4', '(-~3)'),
|
|
||||||
('3', '(-~-~1)'),
|
|
||||||
('2', '(-~1)'),
|
|
||||||
('1', '(-~0)'),
|
|
||||||
('0', '((c^_^o)-(c^_^o))'),
|
|
||||||
]
|
|
||||||
delim = '(゚Д゚)[゚ε゚]+'
|
|
||||||
ret = ''
|
|
||||||
for aachar in txt.split(delim):
|
|
||||||
for val, pat in symbol_table:
|
|
||||||
aachar = aachar.replace(pat, val)
|
|
||||||
aachar = aachar.replace('+ ', '')
|
|
||||||
m = re.match(r'^\d+', aachar)
|
|
||||||
if m:
|
|
||||||
ret += compat_chr(int(m.group(0), 8))
|
|
||||||
else:
|
|
||||||
m = re.match(r'^u([\da-f]+)', aachar)
|
|
||||||
if m:
|
|
||||||
ret += compat_chr(int(m.group(1), 16))
|
|
||||||
return cls.openload_level2(ret)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
@@ -102,29 +47,77 @@ class OpenloadIE(InfoExtractor):
|
|||||||
if 'File not found' in webpage:
|
if 'File not found' in webpage:
|
||||||
raise ExtractorError('File not found', expected=True)
|
raise ExtractorError('File not found', expected=True)
|
||||||
|
|
||||||
code = self._search_regex(
|
# The following extraction logic is proposed by @Belderak and @gdkchan
|
||||||
r'</video>\s*</div>\s*<script[^>]+>[^>]+</script>\s*<script[^>]+>([^<]+)</script>',
|
# and declared to be used freely in youtube-dl
|
||||||
webpage, 'JS code')
|
# See https://github.com/rg3/youtube-dl/issues/9706
|
||||||
|
|
||||||
decoded = self.openload_decode(code)
|
numbers_js = self._download_webpage(
|
||||||
|
'https://openload.co/assets/js/obfuscator/n.js', video_id,
|
||||||
|
note='Downloading signature numbers')
|
||||||
|
signums = self._search_regex(
|
||||||
|
r'window\.signatureNumbers\s*=\s*[\'"](?P<data>[a-z]+)[\'"]',
|
||||||
|
numbers_js, 'signature numbers', group='data')
|
||||||
|
|
||||||
video_url = self._search_regex(
|
linkimg_uri = self._search_regex(
|
||||||
r'return\s+"(https?://[^"]+)"', decoded, 'video URL')
|
r'<img[^>]+id="linkimg"[^>]+src="([^"]+)"', webpage, 'link image')
|
||||||
|
linkimg = self._request_webpage(
|
||||||
|
linkimg_uri, video_id, note=False).read()
|
||||||
|
|
||||||
|
width, height, pixels = decode_png(linkimg)
|
||||||
|
|
||||||
|
output = ''
|
||||||
|
for y in range(height):
|
||||||
|
for x in range(width):
|
||||||
|
r, g, b = pixels[y][3 * x:3 * x + 3]
|
||||||
|
if r == 0 and g == 0 and b == 0:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
output += compat_chr(r)
|
||||||
|
output += compat_chr(g)
|
||||||
|
output += compat_chr(b)
|
||||||
|
|
||||||
|
img_str_length = len(output) // 200
|
||||||
|
img_str = [[0 for x in range(img_str_length)] for y in range(10)]
|
||||||
|
|
||||||
|
sig_str_length = len(signums) // 260
|
||||||
|
sig_str = [[0 for x in range(sig_str_length)] for y in range(10)]
|
||||||
|
|
||||||
|
for i in range(10):
|
||||||
|
for j in range(img_str_length):
|
||||||
|
begin = i * img_str_length * 20 + j * 20
|
||||||
|
img_str[i][j] = output[begin:begin + 20]
|
||||||
|
for j in range(sig_str_length):
|
||||||
|
begin = i * sig_str_length * 26 + j * 26
|
||||||
|
sig_str[i][j] = signums[begin:begin + 26]
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
# TODO: find better names for str_, chr_ and sum_
|
||||||
|
str_ = ''
|
||||||
|
for i in [2, 3, 5, 7]:
|
||||||
|
str_ = ''
|
||||||
|
sum_ = float(99)
|
||||||
|
for j in range(len(sig_str[i])):
|
||||||
|
for chr_idx in range(len(img_str[i][j])):
|
||||||
|
if sum_ > float(122):
|
||||||
|
sum_ = float(98)
|
||||||
|
chr_ = compat_chr(int(math.floor(sum_)))
|
||||||
|
if sig_str[i][j][chr_idx] == chr_ and j >= len(str_):
|
||||||
|
sum_ += float(2.5)
|
||||||
|
str_ += img_str[i][j][chr_idx]
|
||||||
|
parts.append(str_.replace(',', ''))
|
||||||
|
|
||||||
|
video_url = 'https://openload.co/stream/%s~%s~%s~%s' % (parts[3], parts[1], parts[2], parts[0])
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||||
'title', default=None) or self._html_search_meta(
|
'title', default=None) or self._html_search_meta(
|
||||||
'description', webpage, 'title', fatal=True)
|
'description', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
ext = mimetype2ext(self._search_regex(
|
|
||||||
r'window\.vt\s*=\s*(["\'])(?P<mimetype>.+?)\1', decoded,
|
|
||||||
'mimetype', default=None, group='mimetype')) or determine_ext(
|
|
||||||
video_url, 'mp4')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': ext,
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
# Seems all videos have extensions in their titles
|
||||||
|
'ext': determine_ext(title),
|
||||||
}
|
}
|
||||||
|
58
youtube_dl/extractor/pokemon.py
Normal file
58
youtube_dl/extractor/pokemon.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PokemonIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P<display_id>[^/?#]+))'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true',
|
||||||
|
'md5': '9fb209ae3a569aac25de0f5afc4ee08f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd0436c00c3ce4071ac6cee8130ac54a1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'From A to Z!',
|
||||||
|
'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!',
|
||||||
|
'timestamp': 1460478136,
|
||||||
|
'upload_date': '20160412',
|
||||||
|
},
|
||||||
|
'add_id': ['LimelightMedia']
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
webpage = self._download_webpage(url, video_id or display_id)
|
||||||
|
video_data = extract_attributes(self._search_regex(
|
||||||
|
r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
|
||||||
|
webpage, 'video data element'))
|
||||||
|
video_id = video_data['data-video-id']
|
||||||
|
title = video_data['data-video-title']
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
|
'url': 'limelight:media:%s' % video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('data-video-summary'),
|
||||||
|
'thumbnail': video_data.get('data-video-poster'),
|
||||||
|
'series': 'Pokémon',
|
||||||
|
'season_number': int_or_none(video_data.get('data-video-season')),
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': int_or_none(video_data.get('data-video-episode')),
|
||||||
|
'ie_key': 'LimelightMedia',
|
||||||
|
}
|
@@ -118,8 +118,12 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
|||||||
xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
|
xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
|
||||||
thumbnails = self._extract_thumbnails(cfg_xml)
|
thumbnails = self._extract_thumbnails(cfg_xml)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = None
|
||||||
self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
|
if self._TITLE_REGEX:
|
||||||
|
title = self._html_search_regex(
|
||||||
|
self._TITLE_REGEX, webpage, 'title', default=None)
|
||||||
|
if not title:
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
|
||||||
age_limit = self._rta_search(webpage) or 18
|
age_limit = self._rta_search(webpage) or 18
|
||||||
|
|
||||||
@@ -189,9 +193,9 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
|
|||||||
class TNAFlixIE(TNAFlixNetworkBaseIE):
|
class TNAFlixIE(TNAFlixNetworkBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
|
||||||
|
|
||||||
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
|
_TITLE_REGEX = r'<title>(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)</title>'
|
||||||
_DESCRIPTION_REGEX = r'<meta[^>]+name="description"[^>]+content="([^"]+)"'
|
_DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<'
|
||||||
_UPLOADER_REGEX = r'<i>\s*Verified Member\s*</i>\s*<h1>(.+?)</h1>'
|
_UPLOADER_REGEX = r'<i>\s*Verified Member\s*</i>\s*<h\d+>(.+?)<'
|
||||||
_CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
|
_CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@@ -47,6 +47,7 @@ from .compat import (
|
|||||||
compat_socket_create_connection,
|
compat_socket_create_connection,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_struct_pack,
|
compat_struct_pack,
|
||||||
|
compat_struct_unpack,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
@@ -1983,11 +1984,27 @@ US_RATINGS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TV_PARENTAL_GUIDELINES = {
|
||||||
|
'TV-Y': 0,
|
||||||
|
'TV-Y7': 7,
|
||||||
|
'TV-G': 0,
|
||||||
|
'TV-PG': 0,
|
||||||
|
'TV-14': 14,
|
||||||
|
'TV-MA': 17,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def parse_age_limit(s):
|
def parse_age_limit(s):
|
||||||
if s is None:
|
if type(s) == int:
|
||||||
|
return s if 0 <= s <= 21 else None
|
||||||
|
if not isinstance(s, compat_basestring):
|
||||||
return None
|
return None
|
||||||
m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
|
m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
|
||||||
return int(m.group('age')) if m else US_RATINGS.get(s)
|
if m:
|
||||||
|
return int(m.group('age'))
|
||||||
|
if s in US_RATINGS:
|
||||||
|
return US_RATINGS[s]
|
||||||
|
return TV_PARENTAL_GUIDELINES.get(s)
|
||||||
|
|
||||||
|
|
||||||
def strip_jsonp(code):
|
def strip_jsonp(code):
|
||||||
@@ -2969,3 +2986,110 @@ def parse_m3u8_attributes(attrib):
|
|||||||
|
|
||||||
def urshift(val, n):
|
def urshift(val, n):
|
||||||
return val >> n if val >= 0 else (val + 0x100000000) >> n
|
return val >> n if val >= 0 else (val + 0x100000000) >> n
|
||||||
|
|
||||||
|
|
||||||
|
# Based on png2str() written by @gdkchan and improved by @yokrysty
|
||||||
|
# Originally posted at https://github.com/rg3/youtube-dl/issues/9706
|
||||||
|
def decode_png(png_data):
|
||||||
|
# Reference: https://www.w3.org/TR/PNG/
|
||||||
|
header = png_data[8:]
|
||||||
|
|
||||||
|
if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
|
||||||
|
raise IOError('Not a valid PNG file.')
|
||||||
|
|
||||||
|
int_map = {1: '>B', 2: '>H', 4: '>I'}
|
||||||
|
unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
|
||||||
|
while header:
|
||||||
|
length = unpack_integer(header[:4])
|
||||||
|
header = header[4:]
|
||||||
|
|
||||||
|
chunk_type = header[:4]
|
||||||
|
header = header[4:]
|
||||||
|
|
||||||
|
chunk_data = header[:length]
|
||||||
|
header = header[length:]
|
||||||
|
|
||||||
|
header = header[4:] # Skip CRC
|
||||||
|
|
||||||
|
chunks.append({
|
||||||
|
'type': chunk_type,
|
||||||
|
'length': length,
|
||||||
|
'data': chunk_data
|
||||||
|
})
|
||||||
|
|
||||||
|
ihdr = chunks[0]['data']
|
||||||
|
|
||||||
|
width = unpack_integer(ihdr[:4])
|
||||||
|
height = unpack_integer(ihdr[4:8])
|
||||||
|
|
||||||
|
idat = b''
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
if chunk['type'] == b'IDAT':
|
||||||
|
idat += chunk['data']
|
||||||
|
|
||||||
|
if not idat:
|
||||||
|
raise IOError('Unable to read PNG data.')
|
||||||
|
|
||||||
|
decompressed_data = bytearray(zlib.decompress(idat))
|
||||||
|
|
||||||
|
stride = width * 3
|
||||||
|
pixels = []
|
||||||
|
|
||||||
|
def _get_pixel(idx):
|
||||||
|
x = idx % stride
|
||||||
|
y = idx // stride
|
||||||
|
return pixels[y][x]
|
||||||
|
|
||||||
|
for y in range(height):
|
||||||
|
basePos = y * (1 + stride)
|
||||||
|
filter_type = decompressed_data[basePos]
|
||||||
|
|
||||||
|
current_row = []
|
||||||
|
|
||||||
|
pixels.append(current_row)
|
||||||
|
|
||||||
|
for x in range(stride):
|
||||||
|
color = decompressed_data[1 + basePos + x]
|
||||||
|
basex = y * stride + x
|
||||||
|
left = 0
|
||||||
|
up = 0
|
||||||
|
|
||||||
|
if x > 2:
|
||||||
|
left = _get_pixel(basex - 3)
|
||||||
|
if y > 0:
|
||||||
|
up = _get_pixel(basex - stride)
|
||||||
|
|
||||||
|
if filter_type == 1: # Sub
|
||||||
|
color = (color + left) & 0xff
|
||||||
|
elif filter_type == 2: # Up
|
||||||
|
color = (color + up) & 0xff
|
||||||
|
elif filter_type == 3: # Average
|
||||||
|
color = (color + ((left + up) >> 1)) & 0xff
|
||||||
|
elif filter_type == 4: # Paeth
|
||||||
|
a = left
|
||||||
|
b = up
|
||||||
|
c = 0
|
||||||
|
|
||||||
|
if x > 2 and y > 0:
|
||||||
|
c = _get_pixel(basex - stride - 3)
|
||||||
|
|
||||||
|
p = a + b - c
|
||||||
|
|
||||||
|
pa = abs(p - a)
|
||||||
|
pb = abs(p - b)
|
||||||
|
pc = abs(p - c)
|
||||||
|
|
||||||
|
if pa <= pb and pa <= pc:
|
||||||
|
color = (color + a) & 0xff
|
||||||
|
elif pb <= pc:
|
||||||
|
color = (color + b) & 0xff
|
||||||
|
else:
|
||||||
|
color = (color + c) & 0xff
|
||||||
|
|
||||||
|
current_row.append(color)
|
||||||
|
|
||||||
|
return width, height, pixels
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2016.08.06'
|
__version__ = '2016.08.07'
|
||||||
|
Reference in New Issue
Block a user