Compare commits
172 Commits
2016.07.06
...
2016.07.26
Author | SHA1 | Date | |
---|---|---|---|
![]() |
dcbb07c35a | ||
![]() |
40090e8d51 | ||
![]() |
3e050d51d4 | ||
![]() |
ced70c8640 | ||
![]() |
9a700deea4 | ||
![]() |
dc35ba0eba | ||
![]() |
88bd486b9a | ||
![]() |
7f8b92e3cf | ||
![]() |
35f6e0ff36 | ||
![]() |
326fa4e6e5 | ||
![]() |
c74299a72c | ||
![]() |
10a1bb3a78 | ||
![]() |
4d3e543c73 | ||
![]() |
05d1e7aaa9 | ||
![]() |
a3aa814b77 | ||
![]() |
5c32a77cad | ||
![]() |
14a28e705b | ||
![]() |
cc99d4f826 | ||
![]() |
712c7530ff | ||
![]() |
0a147785e8 | ||
![]() |
59eaf69e33 | ||
![]() |
e8be2943a7 | ||
![]() |
8fdc538b46 | ||
![]() |
9513c1eb17 | ||
![]() |
ae6fff4e64 | ||
![]() |
5a65668e25 | ||
![]() |
f75e6890db | ||
![]() |
d9cb92c840 | ||
![]() |
94c04a3c79 | ||
![]() |
f094834857 | ||
![]() |
111de00289 | ||
![]() |
b4a131e1a5 | ||
![]() |
f1991ce928 | ||
![]() |
6548030a17 | ||
![]() |
3a8947650b | ||
![]() |
1979969f91 | ||
![]() |
0673741af3 | ||
![]() |
c8e170b209 | ||
![]() |
bbe1f3634a | ||
![]() |
4671dd41b2 | ||
![]() |
f164b97123 | ||
![]() |
5275efe30d | ||
![]() |
b13647cf3c | ||
![]() |
add7d2a0e2 | ||
![]() |
e298d3a08c | ||
![]() |
fd8c8c7dcd | ||
![]() |
9158af16cc | ||
![]() |
c6668e4ad1 | ||
![]() |
84e8cca48b | ||
![]() |
790b06b7d4 | ||
![]() |
740d7c49c2 | ||
![]() |
4e51ec5f57 | ||
![]() |
05087d1b4c | ||
![]() |
a66a73ee90 | ||
![]() |
8188b923db | ||
![]() |
d993a1354d | ||
![]() |
e8882e7043 | ||
![]() |
1056821799 | ||
![]() |
890e6d3309 | ||
![]() |
246080d378 | ||
![]() |
b1ea680270 | ||
![]() |
45550d1039 | ||
![]() |
7cdfc4c90f | ||
![]() |
af21f56f98 | ||
![]() |
1a8f0773b6 | ||
![]() |
59cc5bd8bf | ||
![]() |
49bc16b95e | ||
![]() |
a2f9ca1e67 | ||
![]() |
371ddb14fe | ||
![]() |
998895dffa | ||
![]() |
aadd3ce21f | ||
![]() |
ae7b846203 | ||
![]() |
21ba7d0981 | ||
![]() |
691fbe7f98 | ||
![]() |
2e221ca3a8 | ||
![]() |
317f7ab634 | ||
![]() |
23495d6a39 | ||
![]() |
224db034ab | ||
![]() |
ad27649be3 | ||
![]() |
84571be645 | ||
![]() |
7b0d333a7e | ||
![]() |
342f0c3682 | ||
![]() |
38e0f16a94 | ||
![]() |
e910fe2fe4 | ||
![]() |
233b58dec7 | ||
![]() |
c39b2ed990 | ||
![]() |
35ec86689c | ||
![]() |
c485959034 | ||
![]() |
a0560d8ab8 | ||
![]() |
0385aa6199 | ||
![]() |
00f4764cb7 | ||
![]() |
51c2cd0b83 | ||
![]() |
5f5a9d6158 | ||
![]() |
2d19fb5072 | ||
![]() |
9d865a1af6 | ||
![]() |
41aa44259d | ||
![]() |
381ff44756 | ||
![]() |
7f29cf545a | ||
![]() |
7d1219f3e0 | ||
![]() |
f1b4af7d79 | ||
![]() |
8a8590a617 | ||
![]() |
4a7a5e41f7 | ||
![]() |
2a49d01600 | ||
![]() |
b99af8a51c | ||
![]() |
8e7020daef | ||
![]() |
a26bcc61c1 | ||
![]() |
5c4dcf8172 | ||
![]() |
e9fb6a4bbe | ||
![]() |
e2dbcaa1bf | ||
![]() |
ae01850165 | ||
![]() |
c3baaedfc8 | ||
![]() |
0b68de3cc1 | ||
![]() |
39e9d524e5 | ||
![]() |
865b087224 | ||
![]() |
3121b25639 | ||
![]() |
0286b85c79 | ||
![]() |
ab52bb5137 | ||
![]() |
61a98b8623 | ||
![]() |
6daf34a045 | ||
![]() |
c03adf90bd | ||
![]() |
0ece114b7b | ||
![]() |
5b6a74856b | ||
![]() |
ce43100a01 | ||
![]() |
8cc9b4016d | ||
![]() |
31eeab9f41 | ||
![]() |
9558dcec9c | ||
![]() |
6e6b70d65f | ||
![]() |
d417fd88d0 | ||
![]() |
9e4f5dc1e9 | ||
![]() |
1251565ee0 | ||
![]() |
1f7258a367 | ||
![]() |
0af985069b | ||
![]() |
0de168f7ed | ||
![]() |
95b31e266b | ||
![]() |
6b3a3098b5 | ||
![]() |
2de624fdd5 | ||
![]() |
3fee7f636c | ||
![]() |
89e2fff2b7 | ||
![]() |
cedc70b292 | ||
![]() |
07d7689f2e | ||
![]() |
ae8cb5328d | ||
![]() |
2e32ac0b9a | ||
![]() |
672f01c370 | ||
![]() |
e2d616dd30 | ||
![]() |
0ab7f4fe2b | ||
![]() |
29c4a07776 | ||
![]() |
826e911e41 | ||
![]() |
30d22dae8e | ||
![]() |
ec3518725b | ||
![]() |
5f87d845eb | ||
![]() |
571808a7aa | ||
![]() |
dfe5fa49ae | ||
![]() |
01a0c511eb | ||
![]() |
b3d30315ce | ||
![]() |
882af14d7d | ||
![]() |
47335a0efa | ||
![]() |
34bc2d9dfd | ||
![]() |
08c7af4afa | ||
![]() |
f7291a0b7c | ||
![]() |
c65aa4e9e1 | ||
![]() |
ad213a1d74 | ||
![]() |
43f1e4e41e | ||
![]() |
54b0e909d5 | ||
![]() |
f8752b86ac | ||
![]() |
84c237fb8a | ||
![]() |
ab49d7a9fa | ||
![]() |
b4173f1551 | ||
![]() |
2817b99cf2 | ||
![]() |
001fffd004 | ||
![]() |
59bbe4911a | ||
![]() |
4f3c5e0627 | ||
![]() |
ccff2c404d |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.06**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.26.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.26.2**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.07.06
|
||||
[debug] youtube-dl version 2016.07.26.2
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -7,9 +7,6 @@ python:
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
sudo: false
|
||||
install:
|
||||
- bash ./devscripts/install_srelay.sh
|
||||
- export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
|
2
AUTHORS
2
AUTHORS
@@ -177,3 +177,5 @@ Roman Tsiupa
|
||||
Artur Krysiak
|
||||
Jakub Adam Wieczorek
|
||||
Aleksandar Topuzović
|
||||
Nehal Patel
|
||||
Rob van Bekkum
|
||||
|
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
|
||||
sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
|
||||
If you do not have curl, you can alternatively use a recent wget:
|
||||
@@ -432,6 +432,7 @@ For example, with the following configuration file youtube-dl will always extrac
|
||||
--no-mtime
|
||||
--proxy 127.0.0.1:3128
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
# Lines starting with # are comments
|
||||
```
|
||||
|
||||
Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
|
||||
|
@@ -15,13 +15,9 @@ data = urllib.request.urlopen(URL).read()
|
||||
with open('download.html.in', 'r', encoding='utf-8') as tmplf:
|
||||
template = tmplf.read()
|
||||
|
||||
md5sum = hashlib.md5(data).hexdigest()
|
||||
sha1sum = hashlib.sha1(data).hexdigest()
|
||||
sha256sum = hashlib.sha256(data).hexdigest()
|
||||
template = template.replace('@PROGRAM_VERSION@', version)
|
||||
template = template.replace('@PROGRAM_URL@', URL)
|
||||
template = template.replace('@PROGRAM_MD5SUM@', md5sum)
|
||||
template = template.replace('@PROGRAM_SHA1SUM@', sha1sum)
|
||||
template = template.replace('@PROGRAM_SHA256SUM@', sha256sum)
|
||||
template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0])
|
||||
template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1])
|
||||
|
@@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
mkdir -p tmp && cd tmp
|
||||
wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz
|
||||
tar zxvf srelay-0.4.8b6.tar.gz
|
||||
cd srelay-0.4.8b6
|
||||
./configure
|
||||
make
|
@@ -14,6 +14,7 @@
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9gag**
|
||||
- **9now.com.au**
|
||||
- **abc.net.au**
|
||||
- **Abc7News**
|
||||
- **abcnews**
|
||||
@@ -45,6 +46,7 @@
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
@@ -140,7 +142,7 @@
|
||||
- **CollegeRama**
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
@@ -224,6 +226,7 @@
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Flipagram**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
@@ -312,6 +315,7 @@
|
||||
- **jpopsuki.tv**
|
||||
- **JWPlatform**
|
||||
- **Kaltura**
|
||||
- **Kamcord**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
@@ -333,6 +337,8 @@
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.it**
|
||||
- **Laola1Tv**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
@@ -394,7 +400,6 @@
|
||||
- **MSN**
|
||||
- **MTV**
|
||||
- **mtv.de**
|
||||
- **mtviggy.com**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
@@ -434,7 +439,6 @@
|
||||
- **Newstube**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **nextmovie.com**
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **nhl.com**
|
||||
@@ -446,6 +450,7 @@
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **NineCNineMedia**
|
||||
- **Nintendo**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **Noco**
|
||||
@@ -473,9 +478,12 @@
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **ocw.mit.edu**
|
||||
- **OdaTV**
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
- **onet.tv**
|
||||
- **onet.tv:channel**
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
@@ -550,6 +558,7 @@
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RockstarGames**
|
||||
- **RoosterTeeth**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
- **RTBF**
|
||||
@@ -562,7 +571,9 @@
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **rtve.es:television**
|
||||
- **RTVNH**
|
||||
- **Rudo**
|
||||
- **RUHD**
|
||||
- **RulePorn**
|
||||
- **rutube**: Rutube videos
|
||||
@@ -637,6 +648,7 @@
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **Streamable**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
@@ -684,6 +696,7 @@
|
||||
- **TNAFlix**
|
||||
- **TNAFlixNetworkEmbed**
|
||||
- **toggle**
|
||||
- **Tosh**: Tosh.0
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
@@ -789,6 +802,7 @@
|
||||
- **vine:user**
|
||||
- **vk**: VK
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
|
@@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||
|
||||
def test_audio_only_extractor_format_selection(self):
|
||||
# For extractors with incomplete formats (all formats are audio-only or
|
||||
# video-only) best and worst should fallback to corresponding best/worst
|
||||
# video-only or audio-only formats (as per
|
||||
# https://github.com/rg3/youtube-dl/pull/5556)
|
||||
formats = [
|
||||
{'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': 'best'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'high')
|
||||
|
||||
ydl = YDL({'format': 'worst'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'low')
|
||||
|
||||
def test_format_not_available(self):
|
||||
formats = [
|
||||
{'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
|
||||
{'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
# This must fail since complete video-audio format does not match filter
|
||||
# and extractor does not provide incomplete only formats (i.e. only
|
||||
# video-only or audio-only).
|
||||
ydl = YDL({'format': 'best[height>360]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
def test_invalid_format_specs(self):
|
||||
def assert_syntax_error(format_spec):
|
||||
ydl = YDL({'format': format_spec})
|
||||
|
@@ -101,8 +101,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ythistory', ['youtube:history'])
|
||||
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||
|
||||
def test_vimeo_matching(self):
|
||||
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||
|
@@ -88,6 +88,7 @@ class TestCompat(unittest.TestCase):
|
||||
def test_compat_shlex_split(self):
|
||||
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
||||
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
|
||||
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
|
||||
|
||||
def test_compat_etree_fromstring(self):
|
||||
xml = '''
|
||||
|
@@ -33,6 +33,7 @@ from youtube_dl.utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
get_element_by_class,
|
||||
InAdvancePagedList,
|
||||
intlist_to_bytes,
|
||||
is_html,
|
||||
@@ -80,6 +81,7 @@ from youtube_dl.utils import (
|
||||
cli_option,
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
parse_codecs,
|
||||
)
|
||||
from youtube_dl.compat import (
|
||||
compat_chr,
|
||||
@@ -607,6 +609,29 @@ class TestUtil(unittest.TestCase):
|
||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||
|
||||
def test_parse_codecs(self):
|
||||
self.assertEqual(parse_codecs(''), {})
|
||||
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
|
||||
'vcodec': 'avc1.77.30',
|
||||
'acodec': 'mp4a.40.2',
|
||||
})
|
||||
self.assertEqual(parse_codecs('mp4a.40.2'), {
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp4a.40.2',
|
||||
})
|
||||
self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
|
||||
'vcodec': 'avc1.42001e',
|
||||
'acodec': 'mp4a.40.5',
|
||||
})
|
||||
self.assertEqual(parse_codecs('avc3.640028'), {
|
||||
'vcodec': 'avc3.640028',
|
||||
'acodec': 'none',
|
||||
})
|
||||
self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
})
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
||||
@@ -991,5 +1016,13 @@ The first line
|
||||
self.assertEqual(urshift(3, 1), 1)
|
||||
self.assertEqual(urshift(-3, 1), 2147483646)
|
||||
|
||||
def test_get_element_by_class(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import collections
|
||||
import contextlib
|
||||
import copy
|
||||
import datetime
|
||||
import errno
|
||||
import fileinput
|
||||
@@ -1051,9 +1052,9 @@ class YoutubeDL(object):
|
||||
if isinstance(selector, list):
|
||||
fs = [_build_selector_function(s) for s in selector]
|
||||
|
||||
def selector_function(formats):
|
||||
def selector_function(ctx):
|
||||
for f in fs:
|
||||
for format in f(formats):
|
||||
for format in f(ctx):
|
||||
yield format
|
||||
return selector_function
|
||||
elif selector.type == GROUP:
|
||||
@@ -1061,17 +1062,17 @@ class YoutubeDL(object):
|
||||
elif selector.type == PICKFIRST:
|
||||
fs = [_build_selector_function(s) for s in selector.selector]
|
||||
|
||||
def selector_function(formats):
|
||||
def selector_function(ctx):
|
||||
for f in fs:
|
||||
picked_formats = list(f(formats))
|
||||
picked_formats = list(f(ctx))
|
||||
if picked_formats:
|
||||
return picked_formats
|
||||
return []
|
||||
elif selector.type == SINGLE:
|
||||
format_spec = selector.selector
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if not formats:
|
||||
return
|
||||
if format_spec == 'all':
|
||||
@@ -1084,9 +1085,10 @@ class YoutubeDL(object):
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
yield audiovideo_formats[format_idx]
|
||||
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
||||
elif (all(f.get('acodec') != 'none' for f in formats) or
|
||||
all(f.get('vcodec') != 'none' for f in formats)):
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) we will fallback to best/worst
|
||||
# {video,audio}-only format
|
||||
elif ctx['incomplete_formats']:
|
||||
yield formats[format_idx]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
@@ -1160,17 +1162,18 @@ class YoutubeDL(object):
|
||||
}
|
||||
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
for pair in itertools.product(video_selector(formats), audio_selector(formats)):
|
||||
def selector_function(ctx):
|
||||
for pair in itertools.product(
|
||||
video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
|
||||
yield _merge(pair)
|
||||
|
||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||
|
||||
def final_selector(formats):
|
||||
def final_selector(ctx):
|
||||
ctx_copy = copy.deepcopy(ctx)
|
||||
for _filter in filters:
|
||||
formats = list(filter(_filter, formats))
|
||||
return selector_function(formats)
|
||||
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
|
||||
return selector_function(ctx_copy)
|
||||
return final_selector
|
||||
|
||||
stream = io.BytesIO(format_spec.encode('utf-8'))
|
||||
@@ -1377,7 +1380,34 @@ class YoutubeDL(object):
|
||||
req_format_list.append('best')
|
||||
req_format = '/'.join(req_format_list)
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
formats_to_download = list(format_selector(formats))
|
||||
|
||||
# While in format selection we may need to have an access to the original
|
||||
# format set in order to calculate some metrics or do some processing.
|
||||
# For now we need to be able to guess whether original formats provided
|
||||
# by extractor are incomplete or not (i.e. whether extractor provides only
|
||||
# video-only or audio-only formats) for proper formats selection for
|
||||
# extractors with such incomplete formats (see
|
||||
# https://github.com/rg3/youtube-dl/pull/5556).
|
||||
# Since formats may be filtered during format selection and may not match
|
||||
# the original formats the results may be incorrect. Thus original formats
|
||||
# or pre-calculated metrics should be passed to format selection routines
|
||||
# as well.
|
||||
# We will pass a context object containing all necessary additional data
|
||||
# instead of just formats.
|
||||
# This fixes incorrect format selection issue (see
|
||||
# https://github.com/rg3/youtube-dl/issues/10083).
|
||||
incomplete_formats = (
|
||||
# All formats are video-only or
|
||||
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
|
||||
# all formats are audio-only
|
||||
all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
|
||||
|
||||
ctx = {
|
||||
'formats': formats,
|
||||
'incomplete_formats': incomplete_formats,
|
||||
}
|
||||
|
||||
formats_to_download = list(format_selector(ctx))
|
||||
if not formats_to_download:
|
||||
raise ExtractorError('requested format not available',
|
||||
expected=True)
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
@@ -2594,15 +2595,19 @@ except ImportError: # Python < 3.3
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
if sys.version_info >= (2, 7, 3):
|
||||
try:
|
||||
args = shlex.split('中文')
|
||||
assert (isinstance(args, list) and
|
||||
isinstance(args[0], compat_str) and
|
||||
args[0] == '中文')
|
||||
compat_shlex_split = shlex.split
|
||||
else:
|
||||
except (AssertionError, UnicodeEncodeError):
|
||||
# Working around shlex issue with unicode strings on some python 2
|
||||
# versions (see http://bugs.python.org/issue1548891)
|
||||
def compat_shlex_split(s, comments=False, posix=True):
|
||||
if isinstance(s, compat_str):
|
||||
s = s.encode('utf-8')
|
||||
return shlex.split(s, comments, posix)
|
||||
return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
|
||||
|
||||
|
||||
def compat_ord(c):
|
||||
|
@@ -5,6 +5,8 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -50,21 +52,25 @@ class AMPIE(InfoExtractor):
|
||||
if isinstance(media_content, dict):
|
||||
media_content = [media_content]
|
||||
for media_data in media_content:
|
||||
media = media_data['@attributes']
|
||||
media_type = media['type']
|
||||
if media_type in ('video/f4m', 'application/f4m+xml'):
|
||||
media = media_data.get('@attributes', {})
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif media_type == 'application/x-mpegURL':
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
@@ -22,6 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
_TESTS = [{
|
||||
# jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
'info_dict': {
|
||||
'id': '161',
|
||||
@@ -30,17 +31,21 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
# Film wording is used instead of Episode
|
||||
# Film wording is used instead of Episode, ger/jap, Dub/OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Episodes without titles
|
||||
# Episodes without titles, jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/169',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -110,35 +115,12 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
def extract_info(html, video_id, num=None):
|
||||
title, description = [None] * 2
|
||||
formats = []
|
||||
|
||||
for input_ in re.findall(
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
|
||||
attributes = extract_attributes(input_)
|
||||
playlist_urls = []
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
||||
@@ -161,7 +143,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
format_id_list.append(lang)
|
||||
if kind:
|
||||
format_id_list.append(kind)
|
||||
if not format_id_list:
|
||||
if not format_id_list and num is not None:
|
||||
format_id_list.append(compat_str(num))
|
||||
format_id = '-'.join(format_id_list)
|
||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||
@@ -215,28 +197,74 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
})
|
||||
formats.extend(file_formats)
|
||||
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def extract_entries(html, video_id, common_info, num=None):
|
||||
info = extract_info(html, video_id, num)
|
||||
|
||||
if info['formats']:
|
||||
self._sort_formats(info['formats'])
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
})
|
||||
f.update(info)
|
||||
entries.append(f)
|
||||
|
||||
# Extract teaser only when full episode is not available
|
||||
if not formats:
|
||||
# Extract teaser/trailer only when full episode is not available
|
||||
if not info['formats']:
|
||||
m = re.search(
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
|
||||
episode_html)
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
|
||||
html)
|
||||
if m:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'id': '%s-teaser' % f['id'],
|
||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||
'title': m.group('title'),
|
||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
def extract_episodes(html):
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
extract_entries(episode_html, video_id, common_info)
|
||||
|
||||
def extract_film(html, video_id):
|
||||
common_info = {
|
||||
'id': anime_id,
|
||||
'title': anime_title,
|
||||
'description': anime_description,
|
||||
}
|
||||
extract_entries(html, video_id, common_info)
|
||||
|
||||
extract_episodes(webpage)
|
||||
|
||||
if not entries:
|
||||
extract_film(webpage, anime_id)
|
||||
|
||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
||||
|
@@ -13,13 +13,14 @@ from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||
@@ -34,6 +35,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||
@@ -44,6 +46,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||
'duration': 5252,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
@@ -55,9 +58,21 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||
'duration': 3240,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'md5': '4e8f00631aac0395fee17368ac0e9867',
|
||||
'info_dict': {
|
||||
'id': '30796318',
|
||||
'ext': 'mp3',
|
||||
'title': 'Vor dem Fest',
|
||||
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
||||
'duration': 3287,
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
@@ -113,11 +128,14 @@ class ARDMediathekIE(InfoExtractor):
|
||||
continue
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||
video_id, preference=-1, f4m_id='hds', fatal=False))
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False))
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
if server and server.startswith('rtmp'):
|
||||
f = {
|
||||
@@ -231,7 +249,8 @@ class ARDIE(InfoExtractor):
|
||||
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
||||
'upload_date': '20140804',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
115
youtube_dl/extractor/arkena.py
Normal file
115
youtube_dl/extractor/arkena.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class ArkenaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
# See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
account_id = mobj.group('account_id')
|
||||
|
||||
playlist = self._download_json(
|
||||
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||
% (video_id, account_id),
|
||||
video_id, transform_source=strip_jsonp)['Playlist'][0]
|
||||
|
||||
media_info = playlist['MediaInfo']
|
||||
title = media_info['Title']
|
||||
media_files = playlist['MediaFiles']
|
||||
|
||||
is_live = False
|
||||
formats = []
|
||||
for kind_case, kind_formats in media_files.items():
|
||||
kind = kind_case.lower()
|
||||
for f in kind_formats:
|
||||
f_url = f.get('Url')
|
||||
if not f_url:
|
||||
continue
|
||||
is_live = f.get('Live') == 'true'
|
||||
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||
if kind == 'm3u8' or 'm3u8' in exts:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id=kind, fatal=False, live=is_live))
|
||||
elif kind == 'flash' or 'f4m' in exts:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f_url, video_id, f4m_id=kind, fatal=False))
|
||||
elif kind == 'dash' or 'mpd' in exts:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
f_url, video_id, mpd_id=kind, fatal=False))
|
||||
elif kind == 'silverlight':
|
||||
# TODO: process when ism is supported (see
|
||||
# https://github.com/rg3/youtube-dl/issues/8118)
|
||||
continue
|
||||
else:
|
||||
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media_info.get('Description')
|
||||
video_id = media_info.get('VideoId') or video_id
|
||||
timestamp = parse_iso8601(media_info.get('PublishDate'))
|
||||
thumbnails = [{
|
||||
'url': thumbnail['Url'],
|
||||
'width': int_or_none(thumbnail.get('Size')),
|
||||
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@@ -589,7 +589,8 @@ class BBCIE(BBCCoUkIE):
|
||||
'info_dict': {
|
||||
'id': '150615_telabyad_kentin_cogu',
|
||||
'ext': 'mp4',
|
||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||
'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
|
||||
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
|
||||
'timestamp': 1434397334,
|
||||
'upload_date': '20150615',
|
||||
},
|
||||
@@ -603,6 +604,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
},
|
||||
@@ -818,8 +820,20 @@ class BBCIE(BBCCoUkIE):
|
||||
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
||||
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
||||
if playlist:
|
||||
entries.append(self._extract_from_playlist_sxml(
|
||||
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
|
||||
for key in ('progressiveDownload', 'streaming'):
|
||||
playlist_url = playlist.get('%sUrl' % key)
|
||||
if not playlist_url:
|
||||
continue
|
||||
try:
|
||||
entries.append(self._extract_from_playlist_sxml(
|
||||
playlist_url, playlist_id, timestamp))
|
||||
except Exception as e:
|
||||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
continue
|
||||
raise
|
||||
|
||||
if entries:
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
@@ -998,10 +1012,10 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:iplayer:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
|
||||
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
||||
'info_dict': {
|
||||
'id': 'b05rcz9v',
|
||||
@@ -1009,7 +1023,17 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}
|
||||
'skip': 'This programme is not currently available on BBC iPlayer',
|
||||
}, {
|
||||
# Available for over a year unlike 30 days for most other programmes
|
||||
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
||||
'info_dict': {
|
||||
'id': 'p02tcc32',
|
||||
'title': 'Bohemian Icons',
|
||||
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
|
||||
def _extract_title_and_description(self, webpage):
|
||||
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
|
@@ -12,7 +12,7 @@ class BigflixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
|
||||
'md5': 'ec76aa9b1129e2e5b301a474e54fab74',
|
||||
'md5': 'dc1b4aebb46e3a7077ecc0d9f43f61e3',
|
||||
'info_dict': {
|
||||
'id': '16537',
|
||||
'ext': 'mp4',
|
||||
@@ -26,7 +26,7 @@ class BigflixIE(InfoExtractor):
|
||||
'id': '16070',
|
||||
'ext': 'mp4',
|
||||
'title': 'Madarasapatinam',
|
||||
'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca',
|
||||
'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
'params': {
|
||||
|
@@ -2,11 +2,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
)
|
||||
from .rudo import RudoIE
|
||||
|
||||
|
||||
class BioBioChileTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||
_VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
|
||||
@@ -18,6 +22,7 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Fernando Atria',
|
||||
},
|
||||
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||
}, {
|
||||
# different uploader layout
|
||||
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
|
||||
@@ -32,6 +37,16 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||
}, {
|
||||
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
||||
'info_dict': {
|
||||
'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
|
||||
'ext': 'mp4',
|
||||
'uploader': '(none)',
|
||||
'upload_date': '20160708',
|
||||
'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||
'only_matching': True,
|
||||
@@ -45,42 +60,22 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
rudo_url = RudoIE._extract_url(webpage)
|
||||
if not rudo_url:
|
||||
raise ExtractorError('No videos found')
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
|
||||
|
||||
file_url = self._search_regex(
|
||||
r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'file url', group='url')
|
||||
|
||||
base_url = self._search_regex(
|
||||
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
|
||||
'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
|
||||
group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
f = {
|
||||
'url': '%s%s' % (base_url, file_url),
|
||||
'format_id': 'http',
|
||||
'protocol': 'http',
|
||||
'preference': 1,
|
||||
}
|
||||
if formats:
|
||||
f_copy = formats[-1].copy()
|
||||
f_copy.update(f)
|
||||
f = f_copy
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
|
||||
r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': rudo_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -26,6 +26,8 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
clean_html,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -90,6 +92,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'description': 'md5:363109c02998fee92ec02211bd8000df',
|
||||
'uploader': 'National Ballet of Canada',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
},
|
||||
{
|
||||
# test flv videos served by akamaihd.net
|
||||
@@ -108,7 +111,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
# playlist test
|
||||
# playlist with 'videoList'
|
||||
# from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||
'info_dict': {
|
||||
@@ -117,6 +120,15 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
{
|
||||
# playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965)
|
||||
'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
|
||||
'info_dict': {
|
||||
'id': '1522758701001',
|
||||
'title': 'Lesson 08',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
},
|
||||
]
|
||||
FLV_VCODECS = {
|
||||
1: 'SORENSON',
|
||||
@@ -298,13 +310,19 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
info_url, player_key, 'Downloading playlist information')
|
||||
|
||||
json_data = json.loads(playlist_info)
|
||||
if 'videoList' not in json_data:
|
||||
if 'videoList' in json_data:
|
||||
playlist_info = json_data['videoList']
|
||||
playlist_dto = playlist_info['mediaCollectionDTO']
|
||||
elif 'playlistTabs' in json_data:
|
||||
playlist_info = json_data['playlistTabs']
|
||||
playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
|
||||
else:
|
||||
raise ExtractorError('Empty playlist')
|
||||
playlist_info = json_data['videoList']
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
|
||||
|
||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||
playlist_title=playlist_dto['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
video_id = compat_str(video_info['id'])
|
||||
@@ -528,14 +546,16 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
container = source.get('container')
|
||||
source_type = source.get('type')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
if source_type == 'application/x-mpegURL' or container == 'M2TS':
|
||||
if ext == 'ism':
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'application/dash+xml':
|
||||
elif ext == 'mpd':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
|
||||
@@ -551,7 +571,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
'tbr': tbr,
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'container': container,
|
||||
'ext': container.lower(),
|
||||
'ext': ext or container.lower(),
|
||||
}
|
||||
if width == 0 and height == 0:
|
||||
f.update({
|
||||
@@ -604,7 +624,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': json_data.get('description'),
|
||||
'description': clean_html(json_data.get('description')),
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': float_or_none(json_data.get('duration'), 1000),
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,8 +9,10 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
clean_html,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -26,14 +27,14 @@ class CamdemyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': '',
|
||||
'creator': 'ss11spring',
|
||||
'duration': 1591,
|
||||
'upload_date': '20130114',
|
||||
'timestamp': 1358154556,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# With non-empty description
|
||||
# webpage returns "No permission or not login"
|
||||
'url': 'http://www.camdemy.com/media/13885',
|
||||
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||
'info_dict': {
|
||||
@@ -41,64 +42,71 @@ class CamdemyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'EverCam + Camdemy QuickStart',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
|
||||
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
|
||||
'creator': 'evercam',
|
||||
'upload_date': '20140620',
|
||||
'timestamp': 1403271569,
|
||||
'duration': 318,
|
||||
}
|
||||
}, {
|
||||
# External source
|
||||
# External source (YouTube)
|
||||
'url': 'http://www.camdemy.com/media/14842',
|
||||
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
|
||||
'info_dict': {
|
||||
'id': '2vsYQzNIsJo',
|
||||
'ext': 'mp4',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'upload_date': '20130211',
|
||||
'uploader': 'Hun Kim',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'uploader_id': 'hunkimtutorials',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
src_from = self._html_search_regex(
|
||||
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
|
||||
'external source', default=None)
|
||||
r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
|
||||
webpage, 'external source', default=None, group='url')
|
||||
if src_from:
|
||||
return self.url_result(src_from)
|
||||
|
||||
oembed_obj = self._download_json(
|
||||
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||
|
||||
title = oembed_obj['title']
|
||||
thumb_url = oembed_obj['thumbnail_url']
|
||||
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||
file_list_doc = self._download_xml(
|
||||
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||
video_id, 'Filelist XML')
|
||||
video_id, 'Downloading filelist XML')
|
||||
file_name = file_list_doc.find('./video/item/fileName').text
|
||||
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'creation time', fatal=False),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'view count', fatal=False))
|
||||
# Some URLs return "No permission or not login" in a webpage despite being
|
||||
# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'>published on ([^<]+)<', webpage,
|
||||
'upload date', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
|
||||
webpage, 'view count', default=None))
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, default=None) or clean_html(
|
||||
oembed_obj.get('description'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': oembed_obj['title'],
|
||||
'title': title,
|
||||
'thumbnail': thumb_url,
|
||||
'description': self._html_search_meta('description', page),
|
||||
'creator': oembed_obj['author_name'],
|
||||
'duration': oembed_obj['duration'],
|
||||
'timestamp': timestamp,
|
||||
'description': description,
|
||||
'creator': oembed_obj.get('author_name'),
|
||||
'duration': parse_duration(oembed_obj.get('duration')),
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
|
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,8 +27,22 @@ class CBCIE(InfoExtractor):
|
||||
'upload_date': '20160203',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# with clipId
|
||||
# with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
|
||||
'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
|
||||
'md5': '162adfa070274b144f4fdc3c3b8207db',
|
||||
'info_dict': {
|
||||
'id': '2414435309',
|
||||
'ext': 'mp4',
|
||||
'title': '22 Minutes Update: What Not To Wear Quebec',
|
||||
'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
|
||||
'upload_date': '20131025',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'timestamp': 1382717907,
|
||||
},
|
||||
}, {
|
||||
# with clipId, feed only available via tpfeed.cbc.ca
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
||||
'info_dict': {
|
||||
@@ -64,6 +80,7 @@ class CBCIE(InfoExtractor):
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}],
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -81,9 +98,15 @@ class CBCIE(InfoExtractor):
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
feed = self._download_json(
|
||||
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||
clip_id, fatal=False)
|
||||
if feed:
|
||||
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||
if not media_id:
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
else:
|
||||
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||
@@ -104,6 +127,7 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'upload_date': '20160210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||
|
@@ -26,6 +26,7 @@ class CBSNewsIE(CBSBaseIE):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Subscribers only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
@@ -69,7 +70,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
@@ -77,7 +78,15 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
}
|
||||
'skip': 'Video gone, redirected to http://www.cbsnews.com/live/',
|
||||
}, {
|
||||
'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/',
|
||||
'info_dict': {
|
||||
'id': 'video-shows-intense-paragliding-accident',
|
||||
'ext': 'flv',
|
||||
'title': 'Video Shows Intense Paragliding Accident',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -23,7 +23,7 @@ class CliphunterIE(InfoExtractor):
|
||||
(?P<id>[0-9]+)/
|
||||
(?P<seo>.+?)(?:$|[#\?])
|
||||
'''
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
|
||||
'info_dict': {
|
||||
@@ -32,8 +32,19 @@ class CliphunterIE(InfoExtractor):
|
||||
'title': 'Fun Jynx Maze solo',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
|
||||
'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
|
||||
'info_dict': {
|
||||
'id': '2019449',
|
||||
'ext': 'mp4',
|
||||
'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -1,16 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
from .onet import OnetBaseIE
|
||||
|
||||
|
||||
class ClipRsIE(InfoExtractor):
|
||||
class ClipRsIE(OnetBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
|
||||
_TEST = {
|
||||
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
|
||||
@@ -27,64 +21,13 @@ class ClipRsIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
||||
mvp_id = self._search_mvp_id(webpage)
|
||||
|
||||
response = self._download_json(
|
||||
'http://qi.ckm.onetapi.pl/', video_id,
|
||||
query={
|
||||
'body[id]': video_id,
|
||||
'body[jsonrpc]': '2.0',
|
||||
'body[method]': 'get_asset_detail',
|
||||
'body[params][ID_Publikacji]': video_id,
|
||||
'body[params][Service]': 'www.onet.pl',
|
||||
'content-type': 'application/jsonp',
|
||||
'x-onet-app': 'player.front.onetapi.pl',
|
||||
})
|
||||
info_dict = self._extract_from_id(mvp_id, webpage)
|
||||
info_dict['display_id'] = display_id
|
||||
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error['message']), expected=True)
|
||||
|
||||
video = response['result'].get('0')
|
||||
|
||||
formats = []
|
||||
for _, formats_dict in video['formats'].items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
for format_id, format_list in formats_dict.items():
|
||||
if not isinstance(format_list, list):
|
||||
continue
|
||||
for f in format_list:
|
||||
if not f.get('url'):
|
||||
continue
|
||||
formats.append({
|
||||
'url': f['url'],
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(f.get('vertical_resolution')),
|
||||
'width': int_or_none(f.get('horizontal_resolution')),
|
||||
'abr': float_or_none(f.get('audio_bitrate')),
|
||||
'vbr': float_or_none(f.get('video_bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
meta = video.get('meta', {})
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or meta['title']
|
||||
description = self._og_search_description(webpage, default=None) or meta.get('description')
|
||||
duration = meta.get('length') or meta.get('lenght')
|
||||
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
return info_dict
|
||||
|
@@ -6,7 +6,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,37 +16,26 @@ from ..utils import (
|
||||
|
||||
|
||||
class CloudyIE(InfoExtractor):
|
||||
_IE_DESC = 'cloudy.ec and videoraj.ch'
|
||||
_IE_DESC = 'cloudy.ec'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/
|
||||
https?://(?:www\.)?cloudy\.ec/
|
||||
(?:v/|embed\.php\?id=)
|
||||
(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_EMBED_URL = 'http://www.%s/embed.php?id=%s'
|
||||
_API_URL = 'http://www.%s/api/player.api.php?%s'
|
||||
_EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
|
||||
_API_URL = 'http://www.cloudy.ec/api/player.api.php'
|
||||
_MAX_TRIES = 2
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
'md5': '5cb253ace826a42f35b4740539bedf07',
|
||||
'info_dict': {
|
||||
'id': 'af511e2527aac',
|
||||
'ext': 'flv',
|
||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.videoraj.to/v/47f399fd8bb60',
|
||||
'md5': '7d0f8799d91efd4eda26587421c3c3b0',
|
||||
'info_dict': {
|
||||
'id': '47f399fd8bb60',
|
||||
'ext': 'flv',
|
||||
'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?',
|
||||
}
|
||||
_TEST = {
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
'md5': '5cb253ace826a42f35b4740539bedf07',
|
||||
'info_dict': {
|
||||
'id': 'af511e2527aac',
|
||||
'ext': 'flv',
|
||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
|
||||
def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
|
||||
|
||||
if try_num > self._MAX_TRIES - 1:
|
||||
raise ExtractorError('Unable to extract video URL', expected=True)
|
||||
@@ -64,9 +52,8 @@ class CloudyIE(InfoExtractor):
|
||||
'errorUrl': error_url,
|
||||
})
|
||||
|
||||
data_url = self._API_URL % (video_host, compat_urllib_parse_urlencode(form))
|
||||
player_data = self._download_webpage(
|
||||
data_url, video_id, 'Downloading player data')
|
||||
self._API_URL, video_id, 'Downloading player data', query=form)
|
||||
data = compat_parse_qs(player_data)
|
||||
|
||||
try_num += 1
|
||||
@@ -88,7 +75,7 @@ class CloudyIE(InfoExtractor):
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
||||
self.report_warning('Invalid video URL, requesting another', video_id)
|
||||
return self._extract_video(video_host, video_id, file_key, video_url, try_num)
|
||||
return self._extract_video(video_id, file_key, video_url, try_num)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -98,14 +85,13 @@ class CloudyIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = self._EMBED_URL % (video_host, video_id)
|
||||
url = self._EMBED_URL % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
file_key = self._search_regex(
|
||||
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
||||
webpage, 'file_key')
|
||||
|
||||
return self._extract_video(video_host, video_id, file_key)
|
||||
return self._extract_video(video_id, file_key)
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
@@ -16,7 +18,27 @@ class CMTIE(MTVIE):
|
||||
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
'description': 'Blame It All On My Roots',
|
||||
},
|
||||
'skip': 'Video not available',
|
||||
}, {
|
||||
'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908',
|
||||
'md5': 'e61a801ca4a183a466c08bd98dccbb1c',
|
||||
'info_dict': {
|
||||
'id': '1504699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Still The King Ep. 109 in 3 Minutes',
|
||||
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
|
||||
'timestamp': 1469421000.0,
|
||||
'upload_date': '20160725',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
if 'error_not_available.swf' in rtmp_video_url:
|
||||
raise ExtractorError(
|
||||
'%s said: video is not available' % cls.IE_NAME, expected=True)
|
||||
|
||||
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||
|
@@ -1,17 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
@@ -26,8 +15,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||
'ext': 'mp4',
|
||||
'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
|
||||
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
||||
'timestamp': 1376798400,
|
||||
'upload_date': '20130818',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
||||
@@ -35,241 +26,73 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'The Daily Show / The Colbert Report'
|
||||
# urls can be abbreviations like :thedailyshow
|
||||
# urls for episodes like:
|
||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
||||
|https?://(:www\.)?
|
||||
(?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
|
||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||
)|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9a-z]+)/
|
||||
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
|
||||
(?:/[^/?#]?|[?#]|$))))
|
||||
'''
|
||||
class ToshIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'Tosh.0'
|
||||
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
||||
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||
'info_dict': {
|
||||
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121213',
|
||||
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow kristen-stewart part 1',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
|
||||
'info_dict': {
|
||||
'id': 'sarah-chayes-extended-interview',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'title': 'thedailyshow Sarah Chayes Extended Interview',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150129',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
|
||||
},
|
||||
},
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150129',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
|
||||
},
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||
'info_dict': {
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'title': 'Twitter Users Share Summer Plans',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
||||
'info_dict': {
|
||||
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
# It's really reported to be published on year 2077
|
||||
'upload_date': '20770610',
|
||||
'timestamp': 3390510600,
|
||||
'subtitles': {
|
||||
'en': 'mincount:3',
|
||||
},
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||
new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm')
|
||||
return new_urls
|
||||
|
||||
_video_extensions = {
|
||||
'3500': 'mp4',
|
||||
'2200': 'mp4',
|
||||
'1700': 'mp4',
|
||||
'1200': 'mp4',
|
||||
'750': 'mp4',
|
||||
'400': 'mp4',
|
||||
}
|
||||
_video_dimensions = {
|
||||
'3500': (1280, 720),
|
||||
'2200': (960, 540),
|
||||
'1700': (768, 432),
|
||||
'1200': (640, 360),
|
||||
'750': (512, 288),
|
||||
'400': (384, 216),
|
||||
}
|
||||
|
||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-f99b626bdfe13568579a',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if mobj.group('shortname'):
|
||||
return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if mobj.group('clip'):
|
||||
if mobj.group('videotitle'):
|
||||
epTitle = mobj.group('videotitle')
|
||||
elif mobj.group('showname') == 'thedailyshow':
|
||||
epTitle = mobj.group('tdstitle')
|
||||
else:
|
||||
epTitle = mobj.group('cntitle')
|
||||
dlNewest = False
|
||||
elif mobj.group('interview'):
|
||||
epTitle = mobj.group('interview_title')
|
||||
dlNewest = False
|
||||
else:
|
||||
dlNewest = not mobj.group('episode')
|
||||
if dlNewest:
|
||||
epTitle = mobj.group('showname')
|
||||
else:
|
||||
epTitle = mobj.group('episode')
|
||||
show_name = mobj.group('showname')
|
||||
mrss_url = self._search_regex(
|
||||
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'mrss url', group='url')
|
||||
|
||||
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
|
||||
if dlNewest:
|
||||
url = htmlHandle.geturl()
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Invalid redirected URL: ' + url)
|
||||
if mobj.group('episode') == '':
|
||||
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
||||
epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
|
||||
|
||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
||||
if len(mMovieParams) == 0:
|
||||
# The Colbert Report embeds the information in a without
|
||||
# a URL prefix; so extract the alternate reference
|
||||
# and then add the URL prefix manually.
|
||||
|
||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
|
||||
if len(altMovieParams) == 0:
|
||||
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
||||
else:
|
||||
mMovieParams = [('http://media.mtvnservices.com/' + altMovieParams[0], altMovieParams[0])]
|
||||
|
||||
uri = mMovieParams[0][1]
|
||||
# Correct cc.com in uri
|
||||
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
|
||||
|
||||
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri}))
|
||||
idoc = self._download_xml(
|
||||
index_url, epTitle,
|
||||
'Downloading show index', 'Unable to download episode index')
|
||||
|
||||
title = idoc.find('./channel/title').text
|
||||
description = idoc.find('./channel/description').text
|
||||
|
||||
entries = []
|
||||
item_els = idoc.findall('.//item')
|
||||
for part_num, itemEl in enumerate(item_els):
|
||||
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
|
||||
|
||||
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
duration = float_or_none(content.attrib.get('duration'))
|
||||
mediagen_url = content.attrib['url']
|
||||
guid = itemEl.find('./guid').text.rpartition(':')[-1]
|
||||
|
||||
cdoc = self._download_xml(
|
||||
mediagen_url, epTitle,
|
||||
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
|
||||
|
||||
turls = []
|
||||
for rendition in cdoc.findall('.//rendition'):
|
||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||
turls.append(finfo)
|
||||
|
||||
formats = []
|
||||
for format, rtmp_video_url in turls:
|
||||
w, h = self._video_dimensions.get(format, (None, None))
|
||||
formats.append({
|
||||
'format_id': 'vhttp-%s' % format,
|
||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format,
|
||||
'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self._extract_subtitles(cdoc, guid)
|
||||
|
||||
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
||||
entries.append({
|
||||
'id': guid,
|
||||
'title': virtual_id,
|
||||
'formats': formats,
|
||||
'uploader': show_name,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': epTitle,
|
||||
'entries': entries,
|
||||
'title': show_name + ' ' + title,
|
||||
'description': description,
|
||||
}
|
||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
||||
|
@@ -44,6 +44,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
@@ -54,6 +55,8 @@ from ..utils import (
|
||||
update_Request,
|
||||
update_url_query,
|
||||
parse_m3u8_attributes,
|
||||
extract_attributes,
|
||||
parse_codecs,
|
||||
)
|
||||
|
||||
|
||||
@@ -161,6 +164,7 @@ class InfoExtractor(object):
|
||||
* "height" (optional, int)
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@@ -803,15 +807,17 @@ class InfoExtractor(object):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
|
||||
def _search_json_ld(self, html, video_id, **kwargs):
|
||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||
json_ld = self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
if not json_ld:
|
||||
return {}
|
||||
return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
|
||||
return self._json_ld(
|
||||
json_ld, video_id, fatal=kwargs.get('fatal', True),
|
||||
expected_type=expected_type)
|
||||
|
||||
def _json_ld(self, json_ld, video_id, fatal=True):
|
||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||
if isinstance(json_ld, compat_str):
|
||||
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
|
||||
if not json_ld:
|
||||
@@ -819,6 +825,8 @@ class InfoExtractor(object):
|
||||
info = {}
|
||||
if json_ld.get('@context') == 'http://schema.org':
|
||||
item_type = json_ld.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
if item_type == 'TVEpisode':
|
||||
info.update({
|
||||
'episode': unescapeHTML(json_ld.get('name')),
|
||||
@@ -837,6 +845,19 @@ class InfoExtractor(object):
|
||||
'title': unescapeHTML(json_ld.get('headline')),
|
||||
'description': unescapeHTML(json_ld.get('articleBody')),
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
info.update({
|
||||
'url': json_ld.get('contentUrl'),
|
||||
'title': unescapeHTML(json_ld.get('name')),
|
||||
'description': unescapeHTML(json_ld.get('description')),
|
||||
'thumbnail': json_ld.get('thumbnailUrl'),
|
||||
'duration': parse_duration(json_ld.get('duration')),
|
||||
'timestamp': unified_timestamp(json_ld.get('uploadDate')),
|
||||
'filesize': float_or_none(json_ld.get('contentSize')),
|
||||
'tbr': int_or_none(json_ld.get('bitrate')),
|
||||
'width': int_or_none(json_ld.get('width')),
|
||||
'height': int_or_none(json_ld.get('height')),
|
||||
})
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
@staticmethod
|
||||
@@ -1186,6 +1207,7 @@ class InfoExtractor(object):
|
||||
'url': format_url(line.strip()),
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_info.get('FRAME-RATE')),
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}
|
||||
@@ -1194,24 +1216,17 @@ class InfoExtractor(object):
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
vcodec, acodec = [None] * 2
|
||||
va_codecs = codecs.split(',')
|
||||
if len(va_codecs) == 1:
|
||||
# Audio only entries usually come with single codec and
|
||||
# no resolution. For more robustness we also check it to
|
||||
# be mp4 audio.
|
||||
if not resolution and va_codecs[0].startswith('mp4a'):
|
||||
vcodec, acodec = 'none', va_codecs[0]
|
||||
else:
|
||||
vcodec = va_codecs[0]
|
||||
else:
|
||||
vcodec, acodec = va_codecs[:2]
|
||||
# Unified Streaming Platform
|
||||
mobj = re.search(
|
||||
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
|
||||
if mobj:
|
||||
abr, vbr = mobj.groups()
|
||||
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||
f.update({
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if last_media is not None:
|
||||
f['m3u8_media'] = last_media
|
||||
last_media = None
|
||||
@@ -1466,6 +1481,13 @@ class InfoExtractor(object):
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
|
||||
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
||||
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||
"""
|
||||
if mpd_doc.get('type') == 'dynamic':
|
||||
return []
|
||||
|
||||
@@ -1498,8 +1520,16 @@ class InfoExtractor(object):
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
ms_info['s'] = []
|
||||
for s in s_e:
|
||||
ms_info['total_number'] += 1 + int(s.get('r', '0'))
|
||||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
})
|
||||
else:
|
||||
timescale = segment_template.get('timescale')
|
||||
if timescale:
|
||||
@@ -1536,7 +1566,7 @@ class InfoExtractor(object):
|
||||
continue
|
||||
representation_attrib = adaptation_set.attrib.copy()
|
||||
representation_attrib.update(representation.attrib)
|
||||
# According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
|
||||
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
|
||||
mime_type = representation_attrib['mimeType']
|
||||
content_type = mime_type.split('/')[0]
|
||||
if content_type == 'text':
|
||||
@@ -1580,16 +1610,40 @@ class InfoExtractor(object):
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth')}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template:
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
representation_ms_info['segment_urls'] = []
|
||||
segment_time = 0
|
||||
|
||||
def add_segment_url():
|
||||
representation_ms_info['segment_urls'].append(
|
||||
media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
)
|
||||
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
add_segment_url()
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += s['d']
|
||||
add_segment_url()
|
||||
segment_time += s['d']
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
@@ -1616,6 +1670,62 @@ class InfoExtractor(object):
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage):
|
||||
def absolute_url(video_url):
|
||||
return compat_urlparse.urljoin(base_url, video_url)
|
||||
|
||||
def parse_content_type(content_type):
|
||||
if not content_type:
|
||||
return {}
|
||||
ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
|
||||
if ctr:
|
||||
mimetype, codecs = ctr.groups()
|
||||
f = parse_codecs(codecs)
|
||||
f['ext'] = mimetype2ext(mimetype)
|
||||
return f
|
||||
return {}
|
||||
|
||||
entries = []
|
||||
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
||||
media_info = {
|
||||
'formats': [],
|
||||
'subtitles': {},
|
||||
}
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = media_attributes.get('src')
|
||||
if src:
|
||||
media_info['formats'].append({
|
||||
'url': absolute_url(src),
|
||||
'vcodec': 'none' if media_type == 'audio' else None,
|
||||
})
|
||||
media_info['thumbnail'] = media_attributes.get('poster')
|
||||
if media_content:
|
||||
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||
source_attributes = extract_attributes(source_tag)
|
||||
src = source_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
f = parse_content_type(source_attributes.get('type'))
|
||||
f.update({
|
||||
'url': absolute_url(src),
|
||||
'vcodec': 'none' if media_type == 'audio' else None,
|
||||
})
|
||||
media_info['formats'].append(f)
|
||||
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
||||
track_attributes = extract_attributes(track_tag)
|
||||
kind = track_attributes.get('kind')
|
||||
if not kind or kind == 'subtitles':
|
||||
src = track_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
if media_info['formats']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
@@ -1676,7 +1786,7 @@ class InfoExtractor(object):
|
||||
|
||||
any_restricted = False
|
||||
for tc in self.get_testcases(include_onlymatching=False):
|
||||
if 'playlist' in tc:
|
||||
if tc.get('playlist', []):
|
||||
tc = tc['playlist'][0]
|
||||
is_restricted = age_restricted(
|
||||
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||
|
@@ -51,8 +51,11 @@ class CSpanIE(InfoExtractor):
|
||||
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
||||
'info_dict': {
|
||||
'id': 'judiciary031715',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
}]
|
||||
|
||||
|
@@ -5,19 +5,20 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_protocol,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DailyMailIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html',
|
||||
'md5': '2f639d446394f53f3a33658b518b6615',
|
||||
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
|
||||
'md5': 'f6129624562251f628296c3a9ffde124',
|
||||
'info_dict': {
|
||||
'id': '1288527',
|
||||
'id': '1295863',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turn any video into an impressionist masterpiece',
|
||||
'description': 'md5:88ddbcb504367987b2708bb38677c9d2',
|
||||
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
|
||||
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,7 +27,7 @@ class DailyMailIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
||||
title = video_data['title']
|
||||
title = unescapeHTML(video_data['title'])
|
||||
video_sources = self._download_json(video_data.get(
|
||||
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
|
||||
|
||||
@@ -55,7 +56,7 @@ class DailyMailIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('descr'),
|
||||
'description': unescapeHTML(video_data.get('descr')),
|
||||
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -111,6 +112,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
return list(map(lambda m: unescapeHTML(m[1]), matches))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -153,18 +161,19 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
type_ = media.get('type')
|
||||
if type_ == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
ext = determine_ext(media_url)
|
||||
if type_ == 'application/x-mpegURL' or ext == 'm3u8':
|
||||
ext = mimetype2ext(type_) or determine_ext(media_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', preference=-1,
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif type_ == 'application/f4m' or ext == 'f4m':
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
'format_id': 'http-%s' % quality,
|
||||
'ext': ext,
|
||||
}
|
||||
m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
|
||||
if m:
|
||||
|
@@ -66,22 +66,32 @@ class DaumIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# Requires dte_type=WEB (#9972)
|
||||
'url': 'http://tvpot.daum.net/v/s3794Uf1NZeZ1qMpGpeqeRU',
|
||||
'md5': 'a8917742069a4dd442516b86e7d66529',
|
||||
'info_dict': {
|
||||
'id': 's3794Uf1NZeZ1qMpGpeqeRU',
|
||||
'ext': 'mp4',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회',
|
||||
'upload_date': '20160611',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
||||
query = compat_urllib_parse_urlencode({'vid': video_id})
|
||||
movie_data = self._download_json(
|
||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
|
||||
video_id, 'Downloading video formats info')
|
||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json',
|
||||
video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'})
|
||||
|
||||
# For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
|
||||
if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
|
||||
return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
|
||||
|
||||
info = self._download_xml(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||
'Downloading video info')
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id,
|
||||
'Downloading video info', query={'vid': video_id})
|
||||
|
||||
formats = []
|
||||
for format_el in movie_data['output_list']['output_list']:
|
||||
|
@@ -4,78 +4,47 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class DBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:(?:lazyplayer|player)/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
|
||||
'md5': '2e24f67936517b143a234b4cadf792ec',
|
||||
'info_dict': {
|
||||
'id': '33100',
|
||||
'id': '3649835190001',
|
||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1404039863.438,
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'timestamp': 1404039863,
|
||||
'upload_date': '20140629',
|
||||
'duration': 69.544,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
'uploader_id': '1027729757001',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew']
|
||||
}, {
|
||||
'url': 'http://dbtv.no/3649835190001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/vice/5000634109001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/filmtrailer/3359293614001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.dbtv.no/discovery/%s' % video_id, display_id)
|
||||
|
||||
video = data['playlist'][0]
|
||||
|
||||
formats = [{
|
||||
'url': f['URL'],
|
||||
'vcodec': f.get('container'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'vbr': float_or_none(f.get('rate'), 1000),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
} for f in video['renditions'] if 'URL' in f]
|
||||
|
||||
if not formats:
|
||||
for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
|
||||
if url_key in video:
|
||||
formats.append({
|
||||
'url': video[url_key],
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
return {
|
||||
'id': compat_str(video['id']),
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video['title'],
|
||||
'description': clean_html(video['desc']),
|
||||
'thumbnail': video.get('splash') or video.get('thumb'),
|
||||
'timestamp': float_or_none(video.get('publishedAt'), 1000),
|
||||
'duration': float_or_none(video.get('length'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'categories': video.get('tags'),
|
||||
'formats': formats,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
@@ -62,11 +62,9 @@ class DCNBaseIE(InfoExtractor):
|
||||
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
||||
r'<a[^>]+href="rtsp(://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# format_url_base + '/manifest.mpd',
|
||||
# video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url_base + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
|
@@ -17,8 +17,12 @@ class DreiSatIE(ZDFIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'uploader': '3sat',
|
||||
'uploader': 'SCHWEIZWEIT',
|
||||
'uploader_id': '100000210',
|
||||
'upload_date': '20140913'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@@ -6,12 +6,13 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
)
|
||||
|
||||
|
||||
class EllenTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
|
||||
'md5': '4294cf98bc165f218aaa0b89e0fd8042',
|
||||
'info_dict': {
|
||||
@@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor):
|
||||
'timestamp': 1428035648,
|
||||
'upload_date': '20150403',
|
||||
'uploader_id': 'batchUser',
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# not available via http://widgets.ellentube.com/
|
||||
'url': 'http://www.ellentv.com/videos/1-szkgu2m2/',
|
||||
'info_dict': {
|
||||
'id': '1_szkgu2m2',
|
||||
'ext': 'flv',
|
||||
'title': "Ellen's Amazingly Talented Audience",
|
||||
'description': 'md5:86ff1e376ff0d717d7171590e273f0a5',
|
||||
'timestamp': 1255140900,
|
||||
'upload_date': '20091010',
|
||||
'uploader_id': 'ellenkaltura@gmail.com',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://widgets.ellentube.com/videos/%s' % video_id,
|
||||
video_id)
|
||||
URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id')
|
||||
for num, url_ in enumerate(URLS, 1):
|
||||
webpage = self._download_webpage(
|
||||
url_, video_id, fatal=num == len(URLS))
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
[r'id="kaltura_player_([^"]+)"',
|
||||
r"_wb_entry_id\s*:\s*'([^']+)",
|
||||
r'data-kaltura-entry-id="([^"]+)'],
|
||||
webpage, 'kaltura id')
|
||||
default = NO_DEFAULT if num == len(URLS) else None
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id',
|
||||
default=default)
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
[r'id="kaltura_player_([^"]+)"',
|
||||
r"_wb_entry_id\s*:\s*'([^']+)",
|
||||
r'data-kaltura-entry-id="([^"]+)'],
|
||||
webpage, 'kaltura id', default=default)
|
||||
|
||||
if partner_id and kaltura_id:
|
||||
break
|
||||
|
||||
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
|
||||
|
||||
|
@@ -4,19 +4,23 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||
'info_dict': {
|
||||
'id': '95008',
|
||||
'id': 'qlDUmNsj6VS',
|
||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||
@@ -28,34 +32,72 @@ class EpornerIE(InfoExtractor):
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?) - EPORNER', webpage, 'title')
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
redirect_url = 'http://www.eporner.com/config5/%s' % video_id
|
||||
player_code = self._download_webpage(
|
||||
redirect_url, display_id, note='Downloading player config')
|
||||
video_id = self._match_id(compat_str(urlh.geturl()))
|
||||
|
||||
sources = self._search_regex(
|
||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
|
||||
hash = self._search_regex(
|
||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
r'<title>(.+?) - EPORNER', webpage, 'title')
|
||||
|
||||
# Reverse engineered from vjs.js
|
||||
def calc_hash(s):
|
||||
return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
|
||||
|
||||
video = self._download_json(
|
||||
'http://www.eporner.com/xhr/video/%s' % video_id,
|
||||
display_id, note='Downloading video JSON',
|
||||
query={
|
||||
'hash': calc_hash(hash),
|
||||
'device': 'generic',
|
||||
'domain': 'www.eporner.com',
|
||||
'fallback': 'false',
|
||||
})
|
||||
|
||||
if video.get('available') is False:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, video['message']), expected=True)
|
||||
|
||||
sources = video['sources']
|
||||
|
||||
formats = []
|
||||
for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(\d+)', format_id)
|
||||
if m:
|
||||
fmt['height'] = int(m.group(1))
|
||||
formats.append(fmt)
|
||||
for kind, formats_dict in sources.items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
for format_id, format_dict in formats_dict.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
src = format_dict.get('src')
|
||||
if not isinstance(src, compat_str) or not src.startswith('http'):
|
||||
continue
|
||||
if kind == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=kind, fatal=False))
|
||||
else:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', format_id, 'height', default=None))
|
||||
fps = int_or_none(self._search_regex(
|
||||
r'(\d+)fps', format_id, 'fps', default=None))
|
||||
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
|
@@ -44,6 +44,7 @@ from .appletrailers import (
|
||||
AppleTrailersSectionIE,
|
||||
)
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
@@ -139,9 +140,9 @@ from .chirbit import (
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
from .cloudy import CloudyIE
|
||||
@@ -156,7 +157,11 @@ from .cnn import (
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralIE,
|
||||
ComedyCentralTVIE,
|
||||
ToshIE,
|
||||
)
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import RtmpIE
|
||||
@@ -256,6 +261,7 @@ from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .fktv import FKTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .flipagram import FlipagramIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
@@ -368,6 +374,7 @@ from .jove import JoveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kamcord import KamcordIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
@@ -391,6 +398,10 @@ from .kuwo import (
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
LcpIE,
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lemonde import LemondeIE
|
||||
@@ -469,7 +480,6 @@ from .msn import MSNIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVIggyIE,
|
||||
MTVDEIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
@@ -519,7 +529,6 @@ from .nextmedia import (
|
||||
NextMediaActionNewsIE,
|
||||
AppleDailyIE,
|
||||
)
|
||||
from .nextmovie import NextMovieIE
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhl import (
|
||||
@@ -535,6 +544,8 @@ from .nick import (
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .noco import NocoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
@@ -579,8 +590,13 @@ from .nytimes import (
|
||||
NYTimesArticleIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .onet import (
|
||||
OnetIE,
|
||||
OnetChannelIE,
|
||||
)
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .ooyala import (
|
||||
OoyalaIE,
|
||||
@@ -674,6 +690,7 @@ from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .roosterteeth import RoosterTeethIE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
@@ -682,8 +699,9 @@ from .rtlnl import RtlNlIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rudo import RudoIE
|
||||
from .ruhd import RUHDIE
|
||||
from .ruleporn import RulePornIE
|
||||
from .rutube import (
|
||||
@@ -773,6 +791,7 @@ from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamable import StreamableIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
@@ -981,6 +1000,7 @@ from .viki import (
|
||||
from .vk import (
|
||||
VKIE,
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
)
|
||||
from .vlive import VLiveIE
|
||||
from .vodlocker import VodlockerIE
|
||||
|
@@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://
|
||||
(?:\w+\.)?facebook\.com/
|
||||
(?:[\w-]+\.)?facebook\.com/
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
@@ -127,6 +127,9 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -219,12 +222,25 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
|
||||
if m:
|
||||
swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
|
||||
PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
|
||||
|
||||
for m in re.findall(PATTERN, webpage):
|
||||
swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
|
||||
data = dict(json.loads(swf_params))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
video_data = json.loads(params_raw)['video_data']
|
||||
video_data_candidate = json.loads(params_raw)['video_data']
|
||||
for _, f in video_data_candidate.items():
|
||||
if not f:
|
||||
continue
|
||||
if isinstance(f, dict):
|
||||
f = [f]
|
||||
if not isinstance(f, list):
|
||||
continue
|
||||
if f[0].get('video_id') == video_id:
|
||||
video_data = video_data_candidate
|
||||
break
|
||||
if video_data:
|
||||
break
|
||||
|
||||
def video_data_list2dict(video_data):
|
||||
ret = {}
|
||||
|
115
youtube_dl/extractor/flipagram.py
Normal file
115
youtube_dl/extractor/flipagram.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class FlipagramIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||
'md5': '888dcf08b7ea671381f00fab74692755',
|
||||
'info_dict': {
|
||||
'id': 'nyvTSJMKId',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
||||
'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
|
||||
'duration': 35.571,
|
||||
'timestamp': 1461244995,
|
||||
'upload_date': '20160421',
|
||||
'uploader': 'kitty juria',
|
||||
'uploader_id': 'sjuria101',
|
||||
'creator': 'kitty juria',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
|
||||
video_id)
|
||||
|
||||
flipagram = video_data['flipagram']
|
||||
video = flipagram['video']
|
||||
|
||||
json_ld = self._search_json_ld(webpage, video_id, default=False)
|
||||
title = json_ld.get('title') or flipagram['captionText']
|
||||
description = json_ld.get('description') or flipagram.get('captionText')
|
||||
|
||||
formats = [{
|
||||
'url': video['url'],
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': int_or_none(video_data.get('size')),
|
||||
}]
|
||||
|
||||
preview_url = try_get(
|
||||
flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
|
||||
if preview_url:
|
||||
formats.append({
|
||||
'url': preview_url,
|
||||
'ext': 'm4a',
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
counts = flipagram.get('counts', {})
|
||||
user = flipagram.get('user', {})
|
||||
video_data = flipagram.get('video', {})
|
||||
|
||||
thumbnails = [{
|
||||
'url': self._proto_relative_url(cover['url']),
|
||||
'width': int_or_none(cover.get('width')),
|
||||
'height': int_or_none(cover.get('height')),
|
||||
'filesize': int_or_none(cover.get('size')),
|
||||
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
||||
|
||||
# Note that this only retrieves comments that are initally loaded.
|
||||
# For videos with large amounts of comments, most won't be retrieved.
|
||||
comments = []
|
||||
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
||||
text = comment.get('comment')
|
||||
if not text or not isinstance(text, list):
|
||||
continue
|
||||
comments.append({
|
||||
'author': comment.get('user', {}).get('name'),
|
||||
'author_id': comment.get('user', {}).get('username'),
|
||||
'id': comment.get('id'),
|
||||
'text': text[0],
|
||||
'timestamp': unified_timestamp(comment.get('created')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': float_or_none(flipagram.get('duration'), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
|
||||
'uploader': user.get('name'),
|
||||
'uploader_id': user.get('username'),
|
||||
'creator': user.get('name'),
|
||||
'view_count': int_or_none(counts.get('plays')),
|
||||
'like_count': int_or_none(counts.get('likes')),
|
||||
'repost_count': int_or_none(counts.get('reflips')),
|
||||
'comment_count': int_or_none(counts.get('comments')),
|
||||
'comments': comments,
|
||||
'formats': formats,
|
||||
}
|
@@ -14,7 +14,10 @@ from ..utils import (
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
)
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionCloudIE,
|
||||
)
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
@@ -188,6 +191,21 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Dailymotion embed
|
||||
'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
|
||||
'md5': 'ee7f1828f25a648addc90cb2687b1f12',
|
||||
'info_dict': {
|
||||
'id': 'x4iiko0',
|
||||
'ext': 'mp4',
|
||||
'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
|
||||
'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
|
||||
'timestamp': 1467011958,
|
||||
'upload_date': '20160627',
|
||||
'uploader': 'France Inter',
|
||||
'uploader_id': 'x2q2ez',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -197,7 +215,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||
if dmcloud_url:
|
||||
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||
return self.url_result(dmcloud_url, DailymotionCloudIE.ie_key())
|
||||
|
||||
dailymotion_urls = DailymotionIE._extract_urls(webpage)
|
||||
if dailymotion_urls:
|
||||
return self.playlist_result([
|
||||
self.url_result(dailymotion_url, DailymotionIE.ie_key())
|
||||
for dailymotion_url in dailymotion_urls])
|
||||
|
||||
video_id, catalogue = self._search_regex(
|
||||
(r'id-video=([^@]+@[^"]+)',
|
||||
|
@@ -28,10 +28,13 @@ class GameSpotIE(OnceIE):
|
||||
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
|
||||
'info_dict': {
|
||||
'id': 'gs-2300-6424837',
|
||||
'ext': 'flv',
|
||||
'title': 'The Witcher 3: Wild Hunt [Xbox ONE] - Now Playing',
|
||||
'ext': 'mp4',
|
||||
'title': 'Now Playing - The Witcher 3: Wild Hunt',
|
||||
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -49,7 +49,10 @@ from .pornhub import PornHubIE
|
||||
from .xhamster import XHamsterEmbedIE
|
||||
from .tnaflix import TNAFlixNetworkEmbedIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionCloudIE,
|
||||
)
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .viewlift import ViewLiftEmbedIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE
|
||||
@@ -59,6 +62,7 @@ from .videomore import VideomoreIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .arkena import ArkenaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
@@ -470,7 +474,7 @@ class GenericIE(InfoExtractor):
|
||||
'url': 'http://www.vestifinance.ru/articles/25753',
|
||||
'info_dict': {
|
||||
'id': '25753',
|
||||
'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
|
||||
'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
@@ -637,6 +641,8 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
|
||||
'description': 'Two valets share their love for movie star Liam Neesons.',
|
||||
'timestamp': 1349922600,
|
||||
'upload_date': '20121011',
|
||||
},
|
||||
},
|
||||
# YouTube embed via <data-embed-url="">
|
||||
@@ -853,6 +859,7 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
},
|
||||
'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
|
||||
},
|
||||
# jwplayer YouTube
|
||||
{
|
||||
@@ -1246,6 +1253,20 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'www.hudl.com',
|
||||
},
|
||||
},
|
||||
# twitter:player:stream embed
|
||||
{
|
||||
'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
|
||||
'info_dict': {
|
||||
'id': 'master',
|
||||
'ext': 'mp4',
|
||||
'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
|
||||
'uploader': 'www.rtl.be',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# twitter:player embed
|
||||
{
|
||||
'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
|
||||
@@ -1310,6 +1331,55 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
{
|
||||
# Non-standard Vimeo embed
|
||||
'url': 'https://openclassrooms.com/courses/understanding-the-web',
|
||||
'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
|
||||
'info_dict': {
|
||||
'id': '148867247',
|
||||
'ext': 'mp4',
|
||||
'title': 'Understanding the web - Teaser',
|
||||
'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
|
||||
'upload_date': '20151214',
|
||||
'uploader': 'OpenClassrooms',
|
||||
'uploader_id': 'openclassrooms',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
},
|
||||
{
|
||||
'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [ArkenaIE.ie_key()],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
# 'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||
# 'md5': '888dcf08b7ea671381f00fab74692755',
|
||||
# 'info_dict': {
|
||||
# 'id': 'nyvTSJMKId',
|
||||
# 'ext': 'mp4',
|
||||
# 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
||||
# 'description': '#love for cats.',
|
||||
# 'timestamp': 1461244995,
|
||||
# 'upload_date': '20160421',
|
||||
# },
|
||||
# 'params': {
|
||||
# 'force_generic_extractor': True,
|
||||
# },
|
||||
# }
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -1673,12 +1743,9 @@ class GenericIE(InfoExtractor):
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, lambda m: m[-1])
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
matches = DailymotionIE._extract_urls(webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
return _playlist_from_matches(matches)
|
||||
|
||||
# Look for embedded Dailymotion playlist player (#3822)
|
||||
m = re.search(
|
||||
@@ -2100,6 +2167,11 @@ class GenericIE(InfoExtractor):
|
||||
if digiteka_url:
|
||||
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
||||
|
||||
# Look for Arkena embeds
|
||||
arkena_url = ArkenaIE._extract_url(webpage)
|
||||
if arkena_url:
|
||||
return self.url_result(arkena_url, ArkenaIE.ie_key())
|
||||
|
||||
# Look for Limelight embeds
|
||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||
if mobj:
|
||||
@@ -2152,10 +2224,18 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser
|
||||
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
||||
if embed_url:
|
||||
return self.url_result(embed_url)
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default=None, expected_type='VideoObject')
|
||||
if json_ld and json_ld.get('url'):
|
||||
info_dict.update({
|
||||
'title': video_title or info_dict['title'],
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'age_limit': age_limit
|
||||
})
|
||||
info_dict.update(json_ld)
|
||||
return info_dict
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
@@ -2200,6 +2280,9 @@ class GenericIE(InfoExtractor):
|
||||
r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
|
||||
if not found:
|
||||
# Try to find twitter cards info
|
||||
# twitter:player:stream should be checked before twitter:player since
|
||||
# it is expected to contain a raw stream (see
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
|
||||
found = filter_video(re.findall(
|
||||
r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
|
||||
if not found:
|
||||
@@ -2233,6 +2316,15 @@ class GenericIE(InfoExtractor):
|
||||
'_type': 'url',
|
||||
'url': new_url,
|
||||
}
|
||||
|
||||
if not found:
|
||||
# twitter:player is a https URL to iframe player that may or may not
|
||||
# be supported by youtube-dl thus this is checked the very last (see
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
|
||||
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
||||
if embed_url:
|
||||
return self.url_result(embed_url)
|
||||
|
||||
if not found:
|
||||
raise UnsupportedError(url)
|
||||
|
||||
|
@@ -36,7 +36,6 @@ class InstagramIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'BA-pQFBG8HZ',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'britneyspears',
|
||||
'title': 'Video by britneyspears',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'timestamp': 1453760977,
|
||||
|
71
youtube_dl/extractor/kamcord.py
Normal file
71
youtube_dl/extractor/kamcord.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class KamcordIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?kamcord\.com/v/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.kamcord.com/v/hNYRduDgWb4',
|
||||
'md5': 'c3180e8a9cfac2e86e1b88cb8751b54c',
|
||||
'info_dict': {
|
||||
'id': 'hNYRduDgWb4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drinking Madness',
|
||||
'uploader': 'jacksfilms',
|
||||
'uploader_id': '3044562',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__props\s*=\s*({.+?});?(?:\n|\s*</script)',
|
||||
webpage, 'video'),
|
||||
video_id)['video']
|
||||
|
||||
title = video['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video['play']['hls'], video_id, 'mp4', entry_protocol='m3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = video.get('user', {}).get('username')
|
||||
uploader_id = video.get('user', {}).get('id')
|
||||
|
||||
view_count = int_or_none(video.get('viewCount'))
|
||||
like_count = int_or_none(video.get('heartCount'))
|
||||
comment_count = int_or_none(video.get('messageCount'))
|
||||
|
||||
preference_key = qualities(('small', 'medium', 'large'))
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail_url,
|
||||
'id': thumbnail_id,
|
||||
'preference': preference_key(thumbnail_id),
|
||||
} for thumbnail_id, thumbnail_url in (video.get('thumbnail') or {}).items()
|
||||
if isinstance(thumbnail_id, compat_str) and isinstance(thumbnail_url, compat_str)]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
90
youtube_dl/extractor/lcp.py
Normal file
90
youtube_dl/extractor/lcp.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .arkena import ArkenaIE
|
||||
|
||||
|
||||
class LcpPlayIE(ArkenaIE):
|
||||
_VALID_URL = r'https?://play\.lcp\.fr/embed/(?P<id>[^/]+)/(?P<account_id>[^/]+)/[^/]+/[^/]+'
|
||||
_TESTS = [{
|
||||
'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
|
||||
'md5': 'b8bd9298542929c06c1c15788b1f277a',
|
||||
'info_dict': {
|
||||
'id': '327336',
|
||||
'ext': 'mp4',
|
||||
'title': '327336',
|
||||
'timestamp': 1456391602,
|
||||
'upload_date': '20160225',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class LcpIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# arkena embed
|
||||
'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire',
|
||||
'md5': 'b8bd9298542929c06c1c15788b1f277a',
|
||||
'info_dict': {
|
||||
'id': 'd56d03e9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche',
|
||||
'description': 'md5:96ad55009548da9dea19f4120c6c16a8',
|
||||
'timestamp': 1456488895,
|
||||
'upload_date': '20160226',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# dailymotion live stream
|
||||
'url': 'http://www.lcp.fr/le-direct',
|
||||
'info_dict': {
|
||||
'id': 'xji3qy',
|
||||
'ext': 'mp4',
|
||||
'title': 'La Chaine Parlementaire (LCP), Live TNT',
|
||||
'description': 'md5:5c69593f2de0f38bd9a949f2c95e870b',
|
||||
'uploader': 'LCP',
|
||||
'uploader_id': 'xbz33d',
|
||||
'timestamp': 1308923058,
|
||||
'upload_date': '20110624',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 live stream
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.lcp.fr/emissions/277792-les-volontaires',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
play_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>%s?(?:(?!\1).)*)\1' % LcpPlayIE._VALID_URL,
|
||||
webpage, 'play iframe', default=None, group='url')
|
||||
|
||||
if not play_url:
|
||||
return self.url_result(url, 'Generic')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, fatal=True)
|
||||
description = self._html_search_meta(
|
||||
('description', 'twitter:description'), webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': LcpPlayIE.ie_key(),
|
||||
'url': play_url,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
@@ -23,6 +23,7 @@ from ..utils import (
|
||||
str_or_none,
|
||||
url_basename,
|
||||
urshift,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -89,6 +90,10 @@ class LeIE(InfoExtractor):
|
||||
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
|
||||
return _loc3_
|
||||
|
||||
# reversed from http://jstatic.letvcdn.com/sdk/player.js
|
||||
def get_mms_key(self, time):
|
||||
return self.ror(time, 8) ^ 185025305
|
||||
|
||||
# see M3U8Encryption class in KLetvPlayer.swf
|
||||
@staticmethod
|
||||
def decrypt_m3u8(encrypted_data):
|
||||
@@ -109,23 +114,7 @@ class LeIE(InfoExtractor):
|
||||
|
||||
return bytes(_loc7_)
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
page = self._download_webpage(url, media_id)
|
||||
params = {
|
||||
'id': media_id,
|
||||
'platid': 1,
|
||||
'splatid': 101,
|
||||
'format': 1,
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.le.com'
|
||||
}
|
||||
|
||||
play_json = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJson',
|
||||
media_id, 'Downloading playJson data', query=params,
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
def _check_errors(self, play_json):
|
||||
# Check for errors
|
||||
playstatus = play_json['playstatus']
|
||||
if playstatus['status'] == 0:
|
||||
@@ -136,43 +125,99 @@ class LeIE(InfoExtractor):
|
||||
msg = 'Generic error. flag = %d' % flag
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
playurl = play_json['playurl']
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
page = self._download_webpage(url, media_id)
|
||||
|
||||
formats = ['350', '1000', '1300', '720p', '1080p']
|
||||
dispatch = playurl['dispatch']
|
||||
play_json_h5 = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJsonH5',
|
||||
media_id, 'Downloading html5 playJson data', query={
|
||||
'id': media_id,
|
||||
'platid': 3,
|
||||
'splatid': 304,
|
||||
'format': 1,
|
||||
'tkey': self.get_mms_key(int(time.time())),
|
||||
'domain': 'www.le.com',
|
||||
'tss': 'no',
|
||||
},
|
||||
headers=self.geo_verification_headers())
|
||||
self._check_errors(play_json_h5)
|
||||
|
||||
urls = []
|
||||
for format_id in formats:
|
||||
if format_id in dispatch:
|
||||
media_url = playurl['domain'][0] + dispatch[format_id][0]
|
||||
media_url += '&' + compat_urllib_parse_urlencode({
|
||||
'm3v': 1,
|
||||
play_json_flash = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJson',
|
||||
media_id, 'Downloading flash playJson data', query={
|
||||
'id': media_id,
|
||||
'platid': 1,
|
||||
'splatid': 101,
|
||||
'format': 1,
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.le.com',
|
||||
},
|
||||
headers=self.geo_verification_headers())
|
||||
self._check_errors(play_json_flash)
|
||||
|
||||
def get_h5_urls(media_url, format_id):
|
||||
location = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id, query={
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'rateid': format_id,
|
||||
})
|
||||
'tss': 'no',
|
||||
})['location']
|
||||
|
||||
nodes_data = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id)
|
||||
return {
|
||||
'http': update_url_query(location, {'tss': 'no'}),
|
||||
'hls': update_url_query(location, {'tss': 'ios'}),
|
||||
}
|
||||
|
||||
req = self._request_webpage(
|
||||
nodes_data['nodelist'][0]['location'], media_id,
|
||||
note='Downloading m3u8 information for format %s' % format_id)
|
||||
def get_flash_urls(media_url, format_id):
|
||||
media_url += '&' + compat_urllib_parse_urlencode({
|
||||
'm3v': 1,
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'rateid': format_id,
|
||||
})
|
||||
|
||||
m3u8_data = self.decrypt_m3u8(req.read())
|
||||
nodes_data = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id)
|
||||
|
||||
url_info_dict = {
|
||||
'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
|
||||
'ext': determine_ext(dispatch[format_id][1]),
|
||||
'format_id': format_id,
|
||||
'protocol': 'm3u8',
|
||||
}
|
||||
req = self._request_webpage(
|
||||
nodes_data['nodelist'][0]['location'], media_id,
|
||||
note='Downloading m3u8 information for format %s' % format_id)
|
||||
|
||||
if format_id[-1:] == 'p':
|
||||
url_info_dict['height'] = int_or_none(format_id[:-1])
|
||||
m3u8_data = self.decrypt_m3u8(req.read())
|
||||
|
||||
urls.append(url_info_dict)
|
||||
return {
|
||||
'hls': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
|
||||
}
|
||||
|
||||
extracted_formats = []
|
||||
formats = []
|
||||
for play_json, get_urls in ((play_json_h5, get_h5_urls), (play_json_flash, get_flash_urls)):
|
||||
playurl = play_json['playurl']
|
||||
play_domain = playurl['domain'][0]
|
||||
|
||||
for format_id, format_data in playurl.get('dispatch', []).items():
|
||||
if format_id in extracted_formats:
|
||||
continue
|
||||
extracted_formats.append(format_id)
|
||||
|
||||
media_url = play_domain + format_data[0]
|
||||
for protocol, format_url in get_urls(media_url, format_id).items():
|
||||
f = {
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_data[1]),
|
||||
'format_id': '%s-%s' % (protocol, format_id),
|
||||
'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
|
||||
'quality': int_or_none(format_id),
|
||||
}
|
||||
|
||||
if format_id[-1:] == 'p':
|
||||
f['height'] = int_or_none(format_id[:-1])
|
||||
|
||||
formats.append(f)
|
||||
self._sort_formats(formats, ('height', 'quality', 'format_id'))
|
||||
|
||||
publish_time = parse_iso8601(self._html_search_regex(
|
||||
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
|
||||
@@ -181,7 +226,7 @@ class LeIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'formats': urls,
|
||||
'formats': formats,
|
||||
'title': playurl['title'],
|
||||
'thumbnail': playurl['pic'],
|
||||
'description': description,
|
||||
|
@@ -100,7 +100,7 @@ class LyndaIE(LyndaBaseIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': '114408',
|
||||
'ext': 'mp4',
|
||||
|
@@ -11,13 +11,14 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
get_element_by_attribute,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
class MetacafeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
|
||||
_VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P<video_id>[^/]+)/(?P<display_id>[^/?#]+)'
|
||||
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
||||
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
||||
IE_NAME = 'metacafe'
|
||||
@@ -47,6 +48,7 @@ class MetacafeIE(InfoExtractor):
|
||||
'uploader': 'ign',
|
||||
'description': 'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
|
||||
},
|
||||
'skip': 'Page is temporarily unavailable.',
|
||||
},
|
||||
# AnyClip video
|
||||
{
|
||||
@@ -55,8 +57,8 @@ class MetacafeIE(InfoExtractor):
|
||||
'id': 'an-dVVXnuY7Jh77J',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Andromeda Strain (1971): Stop the Bomb Part 3',
|
||||
'uploader': 'anyclip',
|
||||
'description': 'md5:38c711dd98f5bb87acf973d573442e67',
|
||||
'uploader': 'AnyClip',
|
||||
'description': 'md5:cbef0460d31e3807f6feb4e7a5952e5b',
|
||||
},
|
||||
},
|
||||
# age-restricted video
|
||||
@@ -110,28 +112,25 @@ class MetacafeIE(InfoExtractor):
|
||||
def report_disclaimer(self):
|
||||
self.to_screen('Retrieving disclaimer')
|
||||
|
||||
def _real_initialize(self):
|
||||
def _confirm_age(self):
|
||||
# Retrieve disclaimer
|
||||
self.report_disclaimer()
|
||||
self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
|
||||
|
||||
# Confirm age
|
||||
disclaimer_form = {
|
||||
'filters': '0',
|
||||
'submit': "Continue - I'm over 18",
|
||||
}
|
||||
request = sanitized_Request(self._FILTER_POST, urlencode_postdata(disclaimer_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self.report_age_confirmation()
|
||||
self._download_webpage(request, None, False, 'Unable to confirm age')
|
||||
self._download_webpage(
|
||||
self._FILTER_POST, None, False, 'Unable to confirm age',
|
||||
data=urlencode_postdata({
|
||||
'filters': '0',
|
||||
'submit': "Continue - I'm over 18",
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id and simplified title from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
# the video may come from an external site
|
||||
m_external = re.match('^(\w{2})-(.*)$', video_id)
|
||||
@@ -144,15 +143,24 @@ class MetacafeIE(InfoExtractor):
|
||||
if prefix == 'cb':
|
||||
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
req = sanitized_Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||
# self._confirm_age()
|
||||
|
||||
# AnyClip videos require the flashversion cookie so that we get the link
|
||||
# to the mp4 file
|
||||
mobj_an = re.match(r'^an-(.*?)$', video_id)
|
||||
if mobj_an:
|
||||
req.headers['Cookie'] = 'flashVersion=0;'
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
headers = {}
|
||||
if video_id.startswith('an-'):
|
||||
headers['Cookie'] = 'flashVersion=0;'
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
webpage = self._download_webpage(url, video_id, headers=headers)
|
||||
|
||||
error = get_element_by_attribute(
|
||||
'class', 'notfound-page-title', webpage)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
video_title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title', default=None) or self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
# Extract URL, uploader and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
@@ -216,20 +224,40 @@ class MetacafeIE(InfoExtractor):
|
||||
'player_url': player_url,
|
||||
'ext': play_path.partition(':')[0],
|
||||
})
|
||||
if video_url is None:
|
||||
flashvars = self._parse_json(self._search_regex(
|
||||
r'flashvars\s*=\s*({.*});', webpage, 'flashvars',
|
||||
default=None), video_id, fatal=False)
|
||||
if flashvars:
|
||||
video_url = []
|
||||
for source in flashvars.get('sources'):
|
||||
source_url = source.get('src')
|
||||
if not source_url:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type')) or determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
video_url.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
video_url.append({
|
||||
'url': source_url,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
if video_url is None:
|
||||
raise ExtractorError('Unsupported video type')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'(?im)<title>(.*) - Video</title>', webpage, 'title')
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'twitter:description', 'description'],
|
||||
webpage, 'title', fatal=False)
|
||||
thumbnail = self._html_search_meta(
|
||||
['og:image', 'twitter:image'], webpage, 'title', fatal=False)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||
webpage, 'uploader nickname', fatal=False)
|
||||
duration = int_or_none(
|
||||
self._html_search_meta('video:duration', webpage))
|
||||
|
||||
self._html_search_meta('video:duration', webpage, default=None))
|
||||
age_limit = (
|
||||
18
|
||||
if re.search(r'(?:"contentRating":|"rating",)"restricted"', webpage)
|
||||
@@ -242,10 +270,11 @@ class MetacafeIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
}]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
|
@@ -9,7 +9,7 @@ class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||
'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
|
||||
'info_dict': {
|
||||
@@ -20,13 +20,18 @@ class MGTVIE(InfoExtractor):
|
||||
'duration': 7461,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# no tbr extracted from stream_url
|
||||
'url': 'http://www.mgtv.com/v/1/1/f/3324755.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
api_data = self._download_json(
|
||||
'http://v.api.mgtv.com/player/video', video_id,
|
||||
query={'video_id': video_id})['data']
|
||||
query={'video_id': video_id},
|
||||
headers=self.geo_verification_headers())['data']
|
||||
info = api_data['info']
|
||||
|
||||
formats = []
|
||||
@@ -40,7 +45,8 @@ class MGTVIE(InfoExtractor):
|
||||
def extract_format(stream_url, format_id, idx, query={}):
|
||||
format_info = self._download_json(
|
||||
stream_url, video_id,
|
||||
note='Download video info for format %s' % format_id or '#%d' % idx, query=query)
|
||||
note='Download video info for format %s' % (format_id or '#%d' % idx),
|
||||
query=query)
|
||||
return {
|
||||
'format_id': format_id,
|
||||
'url': format_info['info'],
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
@@ -18,13 +19,16 @@ class MioMioIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# "type=video" in flashvars
|
||||
'url': 'http://www.miomio.tv/watch/cc88912/',
|
||||
'md5': '317a5f7f6b544ce8419b784ca8edae65',
|
||||
'info_dict': {
|
||||
'id': '88912',
|
||||
'ext': 'flv',
|
||||
'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
|
||||
'duration': 5923,
|
||||
},
|
||||
'params': {
|
||||
# The server provides broken file
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.miomio.tv/watch/cc184024/',
|
||||
'info_dict': {
|
||||
@@ -32,7 +36,7 @@ class MioMioIE(InfoExtractor):
|
||||
'title': '《动漫同人插画绘制》',
|
||||
},
|
||||
'playlist_mincount': 86,
|
||||
'skip': 'This video takes time too long for retrieving the URL',
|
||||
'skip': 'Unable to load videos',
|
||||
}, {
|
||||
'url': 'http://www.miomio.tv/watch/cc173113/',
|
||||
'info_dict': {
|
||||
@@ -40,20 +44,23 @@ class MioMioIE(InfoExtractor):
|
||||
'title': 'The New Macbook 2015 上手试玩与简评'
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'skip': 'Unable to load videos',
|
||||
}, {
|
||||
# new 'h5' player
|
||||
'url': 'http://www.miomio.tv/watch/cc273295/',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '273295',
|
||||
'ext': 'mp4',
|
||||
'title': 'アウト×デラックス 20160526',
|
||||
},
|
||||
'params': {
|
||||
# intermittent HTTP 500
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
mioplayer_path = self._search_regex(
|
||||
r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
|
||||
|
||||
http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
|
||||
|
||||
def _extract_mioplayer(self, webpage, video_id, title, http_headers):
|
||||
xml_config = self._search_regex(
|
||||
r'flashvars="type=(?:sina|video)&(.+?)&',
|
||||
webpage, 'xml config')
|
||||
@@ -92,10 +99,34 @@ class MioMioIE(InfoExtractor):
|
||||
'http_headers': http_headers,
|
||||
})
|
||||
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
mioplayer_path = self._search_regex(
|
||||
r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path')
|
||||
|
||||
if '_h5' in mioplayer_path:
|
||||
player_url = compat_urlparse.urljoin(url, mioplayer_path)
|
||||
player_webpage = self._download_webpage(
|
||||
player_url, video_id,
|
||||
note='Downloading player webpage', headers={'Referer': url})
|
||||
entries = self._parse_html5_media_entries(player_url, player_webpage)
|
||||
http_headers = {'Referer': player_url}
|
||||
else:
|
||||
http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
|
||||
entries = self._extract_mioplayer(webpage, video_id, title, http_headers)
|
||||
|
||||
if len(entries) == 1:
|
||||
segment = entries[0]
|
||||
segment['id'] = video_id
|
||||
segment['title'] = title
|
||||
segment['http_headers'] = http_headers
|
||||
return segment
|
||||
|
||||
return {
|
||||
|
@@ -12,12 +12,69 @@ from ..utils import (
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
extract_attributes,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
class MiTeleBaseIE(InfoExtractor):
|
||||
def _get_player_info(self, url, webpage):
|
||||
player_data = extract_attributes(self._search_regex(
|
||||
r'(?s)(<ms-video-player.+?</ms-video-player>)',
|
||||
webpage, 'ms video player'))
|
||||
video_id = player_data['data-media-id']
|
||||
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
mmc_url = config['services']['mmc']
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
|
||||
mmc = self._download_json(
|
||||
m_url, video_id, 'Downloading mmc JSON')
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
bas = location.get('bas')
|
||||
loc = location.get('loc')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, bas, loc, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'bas': bas,
|
||||
'icd': loc,
|
||||
'ogn': ogn,
|
||||
'sta': '0',
|
||||
}
|
||||
media = self._download_json(
|
||||
'%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)),
|
||||
video_id, 'Downloading %s JSON' % location['loc'])
|
||||
file_ = media.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
ext = determine_ext(file_)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
|
||||
class MiTeleIE(MiTeleBaseIE):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
||||
_VALID_URL = r'https?://www\.mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||
@@ -25,7 +82,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
|
||||
'display_id': 'programa-144',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tor, la web invisible',
|
||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||
'series': 'Diario de',
|
||||
@@ -40,7 +97,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'eLZSwoEd1S3pVyUm8lc6F',
|
||||
'display_id': 'programa-226',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
|
||||
'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
|
||||
'series': 'Cuarto Milenio',
|
||||
@@ -59,40 +116,7 @@ class MiTeleIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
config_url = self._search_regex(
|
||||
r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url')
|
||||
config_url = compat_urlparse.urljoin(url, config_url)
|
||||
|
||||
config = self._download_json(
|
||||
config_url, display_id, 'Downloading config JSON')
|
||||
|
||||
mmc = self._download_json(
|
||||
config['services']['mmc'], display_id, 'Downloading mmc JSON')
|
||||
|
||||
formats = []
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
bas = location.get('bas')
|
||||
loc = location.get('loc')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, bas, loc, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'bas': bas,
|
||||
'icd': loc,
|
||||
'ogn': ogn,
|
||||
'sta': '0',
|
||||
}
|
||||
media = self._download_json(
|
||||
'%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)),
|
||||
display_id, 'Downloading %s JSON' % location['loc'])
|
||||
file_ = media.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
display_id, f4m_id=loc))
|
||||
self._sort_formats(formats)
|
||||
info = self._get_player_info(url, webpage)
|
||||
|
||||
title = self._search_regex(
|
||||
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
|
||||
@@ -112,21 +136,12 @@ class MiTeleIE(InfoExtractor):
|
||||
title = remove_start(self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-media-id\s*=\s*"([^"]+)"', webpage,
|
||||
'data media id', default=None) or display_id
|
||||
thumbnail = config.get('poster', {}).get('imageUrl')
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': get_element_by_attribute('class', 'text', webpage),
|
||||
'series': series,
|
||||
'season': season,
|
||||
'episode': episode,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
@@ -15,6 +15,8 @@ from ..utils import (
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
strip_or_none,
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
RegexNotFoundError,
|
||||
@@ -35,13 +37,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
return uri.split(':')[-1]
|
||||
|
||||
# This was originally implemented for ComedyCentral, but it also works here
|
||||
@staticmethod
|
||||
def _transform_rtmp_url(rtmp_video_url):
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||
if not m:
|
||||
return rtmp_video_url
|
||||
return {'rtmp': rtmp_video_url}
|
||||
base = 'http://viacommtvstrmfs.fplive.net/'
|
||||
return base + m.group('finalid')
|
||||
return {'http': base + m.group('finalid')}
|
||||
|
||||
def _get_feed_url(self, uri):
|
||||
return self._FEED_URL
|
||||
@@ -85,14 +87,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
rtmp_video_url = rendition.find('./src').text
|
||||
if rtmp_video_url.endswith('siteunavail.png'):
|
||||
continue
|
||||
new_url = self._transform_rtmp_url(rtmp_video_url)
|
||||
formats.append({
|
||||
new_urls = self._transform_rtmp_url(rtmp_video_url)
|
||||
formats.extend([{
|
||||
'ext': 'flv' if new_url.startswith('rtmp') else ext,
|
||||
'url': new_url,
|
||||
'format_id': rendition.get('bitrate'),
|
||||
'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])),
|
||||
'width': int(rendition.get('width')),
|
||||
'height': int(rendition.get('height')),
|
||||
})
|
||||
} for kind, new_url in new_urls.items()])
|
||||
except (KeyError, TypeError):
|
||||
raise ExtractorError('Invalid rendition field.')
|
||||
self._sort_formats(formats)
|
||||
@@ -133,7 +135,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
message += item.text
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
description = xpath_text(itemdoc, 'description')
|
||||
description = strip_or_none(xpath_text(itemdoc, 'description'))
|
||||
|
||||
timestamp = timeconvert(xpath_text(itemdoc, 'pubDate'))
|
||||
|
||||
title_el = None
|
||||
if title_el is None:
|
||||
@@ -167,6 +171,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||
'description': description,
|
||||
'duration': float_or_none(content_el.attrib.get('duration')),
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
@@ -185,8 +190,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
idoc = self._download_xml(
|
||||
url, video_id,
|
||||
'Downloading info', transform_source=fix_xml_ampersands)
|
||||
|
||||
title = xpath_text(idoc, './channel/title')
|
||||
description = xpath_text(idoc, './channel/description')
|
||||
|
||||
return self.playlist_result(
|
||||
[self._get_video_info(item) for item in idoc.findall('.//item')])
|
||||
[self._get_video_info(item) for item in idoc.findall('.//item')],
|
||||
playlist_title=title, playlist_description=description)
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
try:
|
||||
@@ -232,6 +242,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
|
||||
'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
|
||||
'timestamp': 1400126400,
|
||||
'upload_date': '20140515',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -274,6 +286,8 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
|
||||
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
||||
'timestamp': 1352610000,
|
||||
'upload_date': '20121111',
|
||||
},
|
||||
},
|
||||
]
|
||||
@@ -300,20 +314,6 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
return self._get_videos_info(uri)
|
||||
|
||||
|
||||
class MTVIggyIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtviggy.com'
|
||||
_VALID_URL = r'https?://www\.mtviggy\.com/videos/.+'
|
||||
_TEST = {
|
||||
'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/',
|
||||
'info_dict': {
|
||||
'id': '984696',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet',
|
||||
}
|
||||
}
|
||||
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
|
||||
|
||||
|
||||
class MTVDEIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
|
||||
@@ -321,7 +321,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
||||
'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
|
||||
'info_dict': {
|
||||
'id': 'music_video-a50bc5f0b3aa4b3190aa',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'MusicVideo_cro-traum',
|
||||
'description': 'Cro - Traum',
|
||||
},
|
||||
@@ -329,20 +329,21 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Blocked at Travis CI',
|
||||
}, {
|
||||
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
|
||||
'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-f5ae778b9832cc837189',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Blocked at Travis CI',
|
||||
}, {
|
||||
# single video in pagePlaylist with different id
|
||||
'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-4e760566473c4c8c5344',
|
||||
@@ -354,6 +355,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -366,11 +368,14 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
||||
r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
|
||||
video_id)
|
||||
|
||||
def _mrss_url(item):
|
||||
return item['mrss'] + item.get('mrssvars', '')
|
||||
|
||||
# news pages contain single video in playlist with different id
|
||||
if len(playlist) == 1:
|
||||
return self._get_videos_info_from_url(playlist[0]['mrss'], video_id)
|
||||
return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id)
|
||||
|
||||
for item in playlist:
|
||||
item_id = item.get('id')
|
||||
if item_id and compat_str(item_id) == video_id:
|
||||
return self._get_videos_info_from_url(item['mrss'], video_id)
|
||||
return self._get_videos_info_from_url(_mrss_url(item), video_id)
|
||||
|
@@ -1,30 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class NextMovieIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'nextmovie.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?nextmovie\.com/shows/[^/]+/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
|
||||
_FEED_URL = 'http://lite.dextr.mtvi.com/service1/dispatch.htm'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nextmovie.com/shows/exclusives/2013-03-10/mgid:uma:videolist:nextmovie.com:1715019/',
|
||||
'md5': '09a9199f2f11f10107d04fcb153218aa',
|
||||
'info_dict': {
|
||||
'id': '961726',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Muppets\' Gravity',
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return compat_urllib_parse_urlencode({
|
||||
'feed': '1505',
|
||||
'mgid': uri,
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
mgid = self._match_id(url)
|
||||
return self._get_videos_info(mgid)
|
@@ -7,8 +7,9 @@ from ..utils import update_url_query
|
||||
|
||||
|
||||
class NickIE(MTVServicesInfoExtractor):
|
||||
# None of videos on the website are still alive?
|
||||
IE_NAME = 'nick.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?nick\.com/videos/clip/(?P<id>[^/?#.]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
|
||||
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
|
||||
@@ -52,6 +53,9 @@ class NickIE(MTVServicesInfoExtractor):
|
||||
}
|
||||
},
|
||||
],
|
||||
}, {
|
||||
'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
|
72
youtube_dl/extractor/ninenow.py
Normal file
72
youtube_dl/extractor/ninenow.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class NineNowIE(InfoExtractor):
|
||||
IE_NAME = '9now.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
|
||||
'md5': '17cf47d63ec9323e562c9957a968b565',
|
||||
'info_dict': {
|
||||
'id': '16801',
|
||||
'ext': 'mp4',
|
||||
'title': 'St. Kilda\'s Joey Montagna on the potential for a player\'s strike',
|
||||
'description': 'Is a boycott of the NAB Cup "on the table"?',
|
||||
'uploader_id': '4460760524001',
|
||||
'upload_date': '20160713',
|
||||
'timestamp': 1468421266,
|
||||
},
|
||||
'skip': 'Only available in Australia',
|
||||
}, {
|
||||
# episode
|
||||
'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# DRM protected
|
||||
'url': 'https://www.9now.com.au/andrew-marrs-history-of-the-world/season-1/episode-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
page_data = self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*({.*?});', webpage,
|
||||
'page data'), display_id)
|
||||
common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip')
|
||||
video_data = common_data['video']
|
||||
|
||||
if video_data.get('drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId']
|
||||
video_id = compat_str(video_data.get('id') or brightcove_id)
|
||||
title = common_data['name']
|
||||
|
||||
thumbnails = [{
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail_id[1:])
|
||||
} for thumbnail_id, thumbnail_url in common_data.get('image', {}).get('sizes', {}).items()]
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': common_data.get('description'),
|
||||
'duration': float_or_none(video_data.get('duration'), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
46
youtube_dl/extractor/nintendo.py
Normal file
46
youtube_dl/extractor/nintendo.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import unescapeHTML
|
||||
|
||||
|
||||
class NintendoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj',
|
||||
'info_dict': {
|
||||
'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW',
|
||||
'ext': 'flv',
|
||||
'title': 'Duck Hunt Wii U VC NES - Trailer',
|
||||
'duration': 60.326,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.nintendo.com/games/detail/tokyo-mirage-sessions-fe-wii-u',
|
||||
'info_dict': {
|
||||
'id': 'tokyo-mirage-sessions-fe-wii-u',
|
||||
'title': 'Tokyo Mirage Sessions ♯FE',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
entries = [
|
||||
OoyalaIE._build_url_result(m.group('code'))
|
||||
for m in re.finditer(
|
||||
r'class=(["\'])embed-video\1[^>]+data-video-code=(["\'])(?P<code>(?:(?!\2).)+)\2',
|
||||
webpage)]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False)))
|
50
youtube_dl/extractor/odatv.py
Normal file
50
youtube_dl/extractor/odatv.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
remove_start
|
||||
)
|
||||
|
||||
|
||||
class OdaTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?.*\bid=(?P<id>[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://odatv.com/vid_video.php?id=8E388',
|
||||
'md5': 'dc61d052f205c9bf2da3545691485154',
|
||||
'info_dict': {
|
||||
'id': '8E388',
|
||||
'ext': 'mp4',
|
||||
'title': 'Artık Davutoğlu ile devam edemeyiz'
|
||||
}
|
||||
}, {
|
||||
# mobile URL
|
||||
'url': 'http://odatv.com/mob_video.php?id=8E388',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# no video
|
||||
'url': 'http://odatv.com/mob_video.php?id=8E900',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
no_video = 'NO VIDEO!' in webpage
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'mp4\s*:\s*(["\'])(?P<url>http.+?)\1', webpage, 'video url',
|
||||
default=None if no_video else NO_DEFAULT, group='url')
|
||||
|
||||
if no_video:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': remove_start(self._og_search_title(webpage), 'Video: '),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
169
youtube_dl/extractor/onet.py
Normal file
169
youtube_dl/extractor/onet.py
Normal file
@@ -0,0 +1,169 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class OnetBaseIE(InfoExtractor):
|
||||
def _search_mvp_id(self, webpage):
|
||||
return self._search_regex(
|
||||
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
||||
|
||||
def _extract_from_id(self, video_id, webpage):
|
||||
response = self._download_json(
|
||||
'http://qi.ckm.onetapi.pl/', video_id,
|
||||
query={
|
||||
'body[id]': video_id,
|
||||
'body[jsonrpc]': '2.0',
|
||||
'body[method]': 'get_asset_detail',
|
||||
'body[params][ID_Publikacji]': video_id,
|
||||
'body[params][Service]': 'www.onet.pl',
|
||||
'content-type': 'application/jsonp',
|
||||
'x-onet-app': 'player.front.onetapi.pl',
|
||||
})
|
||||
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error['message']), expected=True)
|
||||
|
||||
video = response['result'].get('0')
|
||||
|
||||
formats = []
|
||||
for _, formats_dict in video['formats'].items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
for format_id, format_list in formats_dict.items():
|
||||
if not isinstance(format_list, list):
|
||||
continue
|
||||
for f in format_list:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if format_id == 'ism':
|
||||
# TODO: Support Microsoft Smooth Streaming
|
||||
continue
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(f.get('vertical_resolution')),
|
||||
'width': int_or_none(f.get('horizontal_resolution')),
|
||||
'abr': float_or_none(f.get('audio_bitrate')),
|
||||
'vbr': float_or_none(f.get('video_bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
meta = video.get('meta', {})
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or meta['title']
|
||||
description = self._og_search_description(webpage, default=None) or meta.get('description')
|
||||
duration = meta.get('length') or meta.get('lenght')
|
||||
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class OnetIE(OnetBaseIE):
|
||||
_VALID_URL = 'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
|
||||
IE_NAME = 'onet.tv'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
|
||||
'md5': 'e3ffbf47590032ac3f27249204173d50',
|
||||
'info_dict': {
|
||||
'id': 'qbpyqc',
|
||||
'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd',
|
||||
'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...',
|
||||
'upload_date': '20160705',
|
||||
'timestamp': 1467721580,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id, video_id = mobj.group('display_id', 'id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
mvp_id = self._search_mvp_id(webpage)
|
||||
|
||||
info_dict = self._extract_from_id(mvp_id, webpage)
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
class OnetChannelIE(OnetBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/(?P<id>[a-z]+)(?:[?#]|$)'
|
||||
IE_NAME = 'onet.tv:channel'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://onet.tv/k/openerfestival',
|
||||
'info_dict': {
|
||||
'id': 'openerfestival',
|
||||
'title': 'Open\'er Festival Live',
|
||||
'description': 'Dziękujemy, że oglądaliście transmisje. Zobaczcie nasze relacje i wywiady z artystami.',
|
||||
},
|
||||
'playlist_mincount': 46,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, channel_id)
|
||||
|
||||
current_clip_info = self._parse_json(self._search_regex(
|
||||
r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id,
|
||||
transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s)))
|
||||
video_id = remove_start(current_clip_info['ckmId'], 'mvp:')
|
||||
video_name = url_basename(current_clip_info['url'])
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen(
|
||||
'Downloading just video %s because of --no-playlist' % video_name)
|
||||
return self._extract_from_id(video_id, webpage)
|
||||
|
||||
self.to_screen(
|
||||
'Downloading channel %s - add --no-playlist to just download video %s' % (
|
||||
channel_id, video_name))
|
||||
matches = re.findall(
|
||||
r'<a[^>]+href=[\'"](https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/[0-9a-z-]+/[0-9a-z]+)',
|
||||
webpage)
|
||||
entries = [
|
||||
self.url_result(video_link, OnetIE.ie_key())
|
||||
for video_link in matches]
|
||||
|
||||
channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
|
||||
channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
|
||||
return self.playlist_result(entries, channel_id, channel_title, channel_description)
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -50,9 +51,8 @@ class OnionStudiosIE(InfoExtractor):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
content_type = source.get('content_type')
|
||||
ext = determine_ext(source_url)
|
||||
if content_type == 'application/x-mpegURL' or ext == 'm3u8':
|
||||
ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
|
@@ -40,16 +40,16 @@ class ORFTVthekIE(InfoExtractor):
|
||||
'skip': 'Blocked outside of Austria / Germany',
|
||||
}, {
|
||||
'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
|
||||
'playlist': [{
|
||||
'md5': '68f543909aea49d621dfc7703a11cfaf',
|
||||
'info_dict': {
|
||||
'id': '7982259',
|
||||
'ext': 'mp4',
|
||||
'title': 'Best of Ingrid Thurnher',
|
||||
'upload_date': '20140527',
|
||||
'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
|
||||
}
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': '7982259',
|
||||
'ext': 'mp4',
|
||||
'title': 'Best of Ingrid Thurnher',
|
||||
'upload_date': '20140527',
|
||||
'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # rtsp downloads
|
||||
},
|
||||
'_skip': 'Blocked outside of Austria / Germany',
|
||||
}]
|
||||
|
||||
@@ -137,13 +137,16 @@ class ORFTVthekIE(InfoExtractor):
|
||||
class ORFOE1IE(InfoExtractor):
|
||||
IE_NAME = 'orf:oe1'
|
||||
IE_DESC = 'Radio Österreich 1'
|
||||
_VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole\?.*?\btrack_id=)(?P<id>[0-9]+)'
|
||||
|
||||
# Audios on ORF radio are only available for 7 days, so we can't add tests.
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
|
||||
'only_matching': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://oe1.orf.at/konsole?show=ondemand&track_id=443608&load_day=/programm/konsole/tag/20160726',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
class PlayvidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
|
||||
'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
|
||||
'info_dict': {
|
||||
@@ -24,8 +24,19 @@ class PlayvidIE(InfoExtractor):
|
||||
'title': 'md5:9256d01c6317e3f703848b5906880dc8',
|
||||
'duration': 82,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Video removed due to ToS',
|
||||
}, {
|
||||
'url': 'http://www.playvid.com/watch/hwb0GpNkzgH',
|
||||
'md5': '39d49df503ad7b8f23a4432cbf046477',
|
||||
'info_dict': {
|
||||
'id': 'hwb0GpNkzgH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -33,6 +33,7 @@ class PolskieRadioIE(InfoExtractor):
|
||||
'timestamp': 1456594200,
|
||||
'upload_date': '20160227',
|
||||
'duration': 2364,
|
||||
'thumbnail': 're:^https?://static\.prsa\.pl/images/.*\.jpg$'
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
@@ -68,6 +69,8 @@ class PolskieRadioIE(InfoExtractor):
|
||||
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
|
||||
webpage, 'timestamp', fatal=False))
|
||||
|
||||
thumbnail_url = self._og_search_thumbnail(webpage)
|
||||
|
||||
entries = []
|
||||
|
||||
media_urls = set()
|
||||
@@ -87,6 +90,7 @@ class PolskieRadioIE(InfoExtractor):
|
||||
'duration': int_or_none(media.get('length')),
|
||||
'vcodec': 'none' if media.get('provider') == 'audio' else None,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail_url
|
||||
})
|
||||
|
||||
title = self._og_search_title(webpage).strip()
|
||||
|
@@ -111,7 +111,7 @@ class PornHubIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\']).*?\b(?:removed|userMessageSection)\b.*?\1[^>]*>(?P<error>.+?)</div>',
|
||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
webpage, 'error message', default=None, group='error')
|
||||
if error_msg:
|
||||
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||
|
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
xpath_element,
|
||||
ExtractorError,
|
||||
determine_protocol,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,13 +23,13 @@ class RadioCanadaIE(InfoExtractor):
|
||||
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
||||
'info_dict': {
|
||||
'id': '7184272',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le parcours du tireur capté sur vidéo',
|
||||
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
||||
'upload_date': '20141023',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
@@ -36,11 +37,14 @@ class RadioCanadaIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
app_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
device_types = ['ipad', 'android']
|
||||
if app_code != 'toutv':
|
||||
device_types.append('flash')
|
||||
|
||||
formats = []
|
||||
# TODO: extract m3u8 and f4m formats
|
||||
# m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
|
||||
# TODO: extract f4m formats
|
||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||
for device_type in ('flash',):
|
||||
for device_type in device_types:
|
||||
v_data = self._download_xml(
|
||||
'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
|
||||
video_id, note='Downloading %s XML' % device_type, query={
|
||||
@@ -52,7 +56,7 @@ class RadioCanadaIE(InfoExtractor):
|
||||
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
|
||||
'paysJ391wsHjbOJwvCs26toz': 'CA',
|
||||
'bypasslock': 'NZt5K62gRqfc',
|
||||
})
|
||||
}, fatal=False)
|
||||
v_url = xpath_text(v_data, 'url')
|
||||
if not v_url:
|
||||
continue
|
||||
@@ -64,7 +68,8 @@ class RadioCanadaIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
v_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
ext = determine_ext(v_url)
|
||||
bitrates = xpath_element(v_data, 'bitrates')
|
||||
@@ -72,15 +77,28 @@ class RadioCanadaIE(InfoExtractor):
|
||||
tbr = int_or_none(url_e.get('bitrate'))
|
||||
if not tbr:
|
||||
continue
|
||||
f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
|
||||
protocol = determine_protocol({'url': f_url})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
|
||||
'ext': 'flv',
|
||||
'protocol': 'rtmp',
|
||||
'format_id': '%s-%d' % (protocol, tbr),
|
||||
'url': f_url,
|
||||
'ext': 'flv' if protocol == 'rtmp' else ext,
|
||||
'protocol': protocol,
|
||||
'width': int_or_none(url_e.get('width')),
|
||||
'height': int_or_none(url_e.get('height')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
if protocol == 'rtsp':
|
||||
base_url = self._search_regex(
|
||||
r'rtsp://([^?]+)', f_url, 'base url', default=None)
|
||||
if base_url:
|
||||
base_url = 'http://' + base_url
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
base_url + '/playlist.m3u8', video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
base_url + '/manifest.f4m', video_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._download_xml(
|
||||
@@ -115,13 +133,13 @@ class RadioCanadaAudioVideoIE(InfoExtractor):
|
||||
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
|
||||
'info_dict': {
|
||||
'id': '7527184',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Barack Obama au Vietnam',
|
||||
'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
|
||||
'upload_date': '20160523',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
148
youtube_dl/extractor/roosterteeth.py
Normal file
148
youtube_dl/extractor/roosterteeth.py
Normal file
@@ -0,0 +1,148 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class RoosterTeethIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
|
||||
_LOGIN_URL = 'https://roosterteeth.com/login'
|
||||
_NETRC_MACHINE = 'roosterteeth'
|
||||
_TESTS = [{
|
||||
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||
'md5': 'e2bd7764732d785ef797700a2489f212',
|
||||
'info_dict': {
|
||||
'id': '26576',
|
||||
'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||
'ext': 'mp4',
|
||||
'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
|
||||
'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
|
||||
'thumbnail': 're:^https?://.*\.png$',
|
||||
'series': 'Million Dollars, But...',
|
||||
'episode': 'Million Dollars, But... The Game Announcement',
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# only available for FIRST members
|
||||
'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None,
|
||||
note='Downloading login page',
|
||||
errnote='Unable to download login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
|
||||
login_request = self._download_webpage(
|
||||
self._LOGIN_URL, None,
|
||||
note='Logging in as %s' % username,
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={
|
||||
'Referer': self._LOGIN_URL,
|
||||
})
|
||||
|
||||
if not any(re.search(p, login_request) for p in (
|
||||
r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"',
|
||||
r'>Sign Out<')):
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>',
|
||||
login_request, 'alert', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
episode = strip_or_none(unescapeHTML(self._search_regex(
|
||||
(r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
|
||||
default=None, group='title')))
|
||||
|
||||
title = strip_or_none(self._og_search_title(
|
||||
webpage, default=None)) or episode
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1',
|
||||
webpage, 'm3u8 url', default=None, group='url')
|
||||
|
||||
if not m3u8_url:
|
||||
if re.search(r'<div[^>]+class=["\']non-sponsor', webpage):
|
||||
self.raise_login_required(
|
||||
'%s is only available for FIRST members' % display_id)
|
||||
|
||||
if re.search(r'<div[^>]+class=["\']golive-gate', webpage):
|
||||
self.raise_login_required('%s is not available yet' % display_id)
|
||||
|
||||
raise ExtractorError('Unable to extract m3u8 URL')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = strip_or_none(self._og_search_description(webpage))
|
||||
thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage))
|
||||
|
||||
series = self._search_regex(
|
||||
(r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'),
|
||||
webpage, 'series', fatal=False)
|
||||
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'>Comments \((\d+)\)<', webpage,
|
||||
'comment count', fatal=False))
|
||||
|
||||
video_id = self._search_regex(
|
||||
(r'containerId\s*=\s*["\']episode-(\d+)\1',
|
||||
r'<div[^<]+id=["\']episode-(\d+)'), webpage,
|
||||
'video id', default=display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -113,9 +113,7 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
if not video_url.endswith('.f4m'):
|
||||
video_url = video_url.replace(
|
||||
'resources/', 'auth/resources/'
|
||||
).replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
|
||||
subtitles = None
|
||||
if info.get('sbtFile') is not None:
|
||||
@@ -222,3 +220,34 @@ class RTVELiveIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class RTVETelevisionIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:television'
|
||||
_VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
|
||||
'info_dict': {
|
||||
'id': '3069778',
|
||||
'ext': 'mp4',
|
||||
'title': 'Documentos TV - La revolución del móvil',
|
||||
'duration': 3496.948,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
alacarta_url = self._search_regex(
|
||||
r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&',
|
||||
webpage, 'alacarta url', default=None)
|
||||
if alacarta_url is None:
|
||||
raise ExtractorError(
|
||||
'The webpage doesn\'t contain any video', expected=True)
|
||||
|
||||
return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())
|
||||
|
53
youtube_dl/extractor/rudo.py
Normal file
53
youtube_dl/extractor/rudo.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
get_element_by_class,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class RudoIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://rudo.video/vod/oTzw0MGnyG',
|
||||
'md5': '2a03a5b32dd90a04c83b6d391cf7b415',
|
||||
'info_dict': {
|
||||
'id': 'oTzw0MGnyG',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comentario Tomás Mosciatti',
|
||||
'upload_date': '20160617',
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _extract_url(self, webpage):
|
||||
mobj = re.search(
|
||||
'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, encoding='iso-8859-1')
|
||||
|
||||
jwplayer_data = self._parse_json(self._search_regex(
|
||||
r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id,
|
||||
transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
|
||||
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, m3u8_id='hls')
|
||||
|
||||
info_dict.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'upload_date': unified_strdate(get_element_by_class('date', webpage)),
|
||||
})
|
||||
|
||||
return info_dict
|
@@ -2,11 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -33,45 +33,27 @@ class ShahidIE(InfoExtractor):
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
if not isinstance(response, dict):
|
||||
return
|
||||
error = response.get('error')
|
||||
def _call_api(self, path, video_id, note):
|
||||
data = self._download_json(
|
||||
'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={
|
||||
'apiKey': 'sh@hid0nlin3',
|
||||
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
||||
}).get('data', {})
|
||||
|
||||
error = data.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
|
||||
expected=True)
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
|
||||
response = super(ShahidIE, self)._download_json(url, video_id, note)['data']
|
||||
self._handle_error(response)
|
||||
return response
|
||||
return data
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
api_vars = {
|
||||
'id': video_id,
|
||||
'type': 'player',
|
||||
'url': 'http://api.shahid.net/api/v1_1',
|
||||
'playerType': 'episode',
|
||||
}
|
||||
|
||||
flashvars = self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None)
|
||||
if flashvars:
|
||||
for key in api_vars.keys():
|
||||
value = self._search_regex(
|
||||
r'\b%s\s*:\s*(?P<q>["\'])(?P<value>.+?)(?P=q)' % key,
|
||||
flashvars, 'type', default=None, group='value')
|
||||
if value:
|
||||
api_vars[key] = value
|
||||
|
||||
player = self._download_json(
|
||||
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html'
|
||||
% (video_id, api_vars['type']), video_id, 'Downloading player JSON')
|
||||
player = self._call_api(
|
||||
'Content/Episode/%s' % video_id,
|
||||
video_id, 'Downloading player JSON')
|
||||
|
||||
if player.get('drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
@@ -79,22 +61,11 @@ class ShahidIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
video = self._download_json(
|
||||
'%s/%s/%s?%s' % (
|
||||
api_vars['url'], api_vars['playerType'], api_vars['id'],
|
||||
compat_urllib_parse_urlencode({
|
||||
'apiKey': 'sh@hid0nlin3',
|
||||
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
||||
})),
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
video = video[api_vars['playerType']]
|
||||
video = self._call_api(
|
||||
'episode/%s' % video_id, video_id,
|
||||
'Downloading video JSON')['episode']
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
thumbnail = video.get('thumbnailUrl')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
timestamp = parse_iso8601(video.get('referenceDate'))
|
||||
categories = [
|
||||
category['name']
|
||||
for category in video.get('genres', []) if 'name' in category]
|
||||
@@ -102,10 +73,16 @@ class ShahidIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': video.get('thumbnailUrl'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('referenceDate')),
|
||||
'categories': categories,
|
||||
'series': video.get('showTitle') or video.get('showName'),
|
||||
'season': video.get('seasonTitle'),
|
||||
'season_number': int_or_none(video.get('seasonNumber')),
|
||||
'season_id': str_or_none(video.get('seasonId')),
|
||||
'episode_number': int_or_none(video.get('number')),
|
||||
'episode_id': video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -5,6 +5,8 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
qualities,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,19 +36,21 @@ class SixPlayIE(InfoExtractor):
|
||||
source_type, source_url = source.get('type'), source.get('src')
|
||||
if not source_url or source_type == 'hls/primetime':
|
||||
continue
|
||||
if source_type == 'application/vnd.apple.mpegURL':
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
source_url.replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif source_type == 'video/mp4':
|
||||
elif ext == 'mp4':
|
||||
quality = source.get('quality')
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': quality,
|
||||
'quality': quality_key(quality),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -13,20 +13,21 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class SmotriIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com'
|
||||
IE_NAME = 'smotri'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||
_NETRC_MACHINE = 'smotri'
|
||||
|
||||
_TESTS = [
|
||||
# real video id 2610366
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v261036632ab',
|
||||
'md5': '2a7b08249e6f5636557579c368040eb9',
|
||||
'md5': '02c0dfab2102984e9c5bb585cc7cc321',
|
||||
'info_dict': {
|
||||
'id': 'v261036632ab',
|
||||
'ext': 'mp4',
|
||||
@@ -174,11 +175,11 @@ class SmotriIE(InfoExtractor):
|
||||
if video_password:
|
||||
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||
|
||||
request = sanitized_Request(
|
||||
'http://smotri.com/video/view/url/bot/', urlencode_postdata(video_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
video = self._download_json(request, video_id, 'Downloading video JSON')
|
||||
video = self._download_json(
|
||||
'http://smotri.com/video/view/url/bot/',
|
||||
video_id, 'Downloading video JSON',
|
||||
data=urlencode_postdata(video_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
|
||||
|
||||
@@ -196,11 +197,11 @@ class SmotriIE(InfoExtractor):
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
title = video['title']
|
||||
thumbnail = video['_imgURL']
|
||||
upload_date = unified_strdate(video['added'])
|
||||
uploader = video['userNick']
|
||||
uploader_id = video['userLogin']
|
||||
duration = int_or_none(video['duration'])
|
||||
thumbnail = video.get('_imgURL')
|
||||
upload_date = unified_strdate(video.get('added'))
|
||||
uploader = video.get('userNick')
|
||||
uploader_id = video.get('userLogin')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
# Video JSON does not provide enough meta data
|
||||
# We will extract some from the video web page instead
|
||||
@@ -209,7 +210,7 @@ class SmotriIE(InfoExtractor):
|
||||
|
||||
# Warning if video is unavailable
|
||||
warning = self._html_search_regex(
|
||||
r'<div class="videoUnModer">(.*?)</div>', webpage,
|
||||
r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage,
|
||||
'warning message', default=None)
|
||||
if warning is not None:
|
||||
self._downloader.report_warning(
|
||||
@@ -217,20 +218,22 @@ class SmotriIE(InfoExtractor):
|
||||
(video_id, warning))
|
||||
|
||||
# Adult content
|
||||
if re.search('EroConfirmText">', webpage) is not None:
|
||||
if 'EroConfirmText">' in webpage:
|
||||
self.report_age_confirmation()
|
||||
confirm_string = self._html_search_regex(
|
||||
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
|
||||
r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id,
|
||||
webpage, 'confirm string')
|
||||
confirm_url = webpage_url + '&confirm=%s' % confirm_string
|
||||
webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
|
||||
webpage = self._download_webpage(
|
||||
confirm_url, video_id,
|
||||
'Downloading video page (age confirmed)')
|
||||
adult_content = True
|
||||
else:
|
||||
adult_content = False
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
|
||||
webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)
|
||||
r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>',
|
||||
webpage, 'view count', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -249,37 +252,33 @@ class SmotriIE(InfoExtractor):
|
||||
class SmotriCommunityIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com community videos'
|
||||
IE_NAME = 'smotri:community'
|
||||
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://smotri.com/community/video/kommuna',
|
||||
'info_dict': {
|
||||
'id': 'kommuna',
|
||||
'title': 'КПРФ',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
community_id = mobj.group('communityid')
|
||||
community_id = self._match_id(url)
|
||||
|
||||
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
|
||||
rss = self._download_xml(url, community_id, 'Downloading community RSS')
|
||||
rss = self._download_xml(
|
||||
'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id,
|
||||
community_id, 'Downloading community RSS')
|
||||
|
||||
entries = [self.url_result(video_url.text, 'Smotri')
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
entries = [
|
||||
self.url_result(video_url.text, SmotriIE.ie_key())
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
description_text = rss.find('./channel/description').text
|
||||
community_title = self._html_search_regex(
|
||||
'^Видео сообщества "([^"]+)"$', description_text, 'community title')
|
||||
|
||||
return self.playlist_result(entries, community_id, community_title)
|
||||
return self.playlist_result(entries, community_id)
|
||||
|
||||
|
||||
class SmotriUserIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com user videos'
|
||||
IE_NAME = 'smotri:user'
|
||||
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://smotri.com/user/inspector',
|
||||
'info_dict': {
|
||||
@@ -290,19 +289,19 @@ class SmotriUserIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('userid')
|
||||
user_id = self._match_id(url)
|
||||
|
||||
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
|
||||
rss = self._download_xml(url, user_id, 'Downloading user RSS')
|
||||
rss = self._download_xml(
|
||||
'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id,
|
||||
user_id, 'Downloading user RSS')
|
||||
|
||||
entries = [self.url_result(video_url.text, 'Smotri')
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
description_text = rss.find('./channel/description').text
|
||||
user_nickname = self._html_search_regex(
|
||||
'^Видео режиссера (.*)$', description_text,
|
||||
'user nickname')
|
||||
description_text = xpath_text(rss, './channel/description') or ''
|
||||
user_nickname = self._search_regex(
|
||||
'^Видео режиссера (.+)$', description_text,
|
||||
'user nickname', fatal=False)
|
||||
|
||||
return self.playlist_result(entries, user_id, user_nickname)
|
||||
|
||||
@@ -310,11 +309,11 @@ class SmotriUserIE(InfoExtractor):
|
||||
class SmotriBroadcastIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com broadcasts'
|
||||
IE_NAME = 'smotri:broadcast'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
broadcast_id = mobj.group('broadcastid')
|
||||
broadcast_id = mobj.group('id')
|
||||
|
||||
broadcast_url = 'http://' + mobj.group('url')
|
||||
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
|
||||
@@ -328,7 +327,8 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required('Erotic broadcasts allowed only for registered users')
|
||||
self.raise_login_required(
|
||||
'Erotic broadcasts allowed only for registered users')
|
||||
|
||||
login_form = {
|
||||
'login-hint53': '1',
|
||||
@@ -343,8 +343,9 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
broadcast_page = self._download_webpage(
|
||||
request, broadcast_id, 'Logging in and confirming age')
|
||||
|
||||
if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
|
||||
raise ExtractorError('Unable to log in: bad username or password', expected=True)
|
||||
if '>Неверный логин или пароль<' in broadcast_page:
|
||||
raise ExtractorError(
|
||||
'Unable to log in: bad username or password', expected=True)
|
||||
|
||||
adult_content = True
|
||||
else:
|
||||
@@ -383,11 +384,11 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
|
||||
broadcast_playpath = broadcast_json['_streamName']
|
||||
broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
|
||||
broadcast_thumbnail = broadcast_json['_imgURL']
|
||||
broadcast_thumbnail = broadcast_json.get('_imgURL')
|
||||
broadcast_title = self._live_title(broadcast_json['title'])
|
||||
broadcast_description = broadcast_json['description']
|
||||
broadcaster_nick = broadcast_json['nick']
|
||||
broadcaster_login = broadcast_json['login']
|
||||
broadcast_description = broadcast_json.get('description')
|
||||
broadcaster_nick = broadcast_json.get('nick')
|
||||
broadcaster_login = broadcast_json.get('login')
|
||||
rtmp_conn = 'S:%s' % uuid.uuid4().hex
|
||||
except KeyError:
|
||||
if protected_broadcast:
|
||||
|
@@ -17,6 +17,8 @@ class SouthParkIE(MTVServicesInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'South Park|Bat Daded',
|
||||
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||
'timestamp': 1112760000,
|
||||
'upload_date': '20050406',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -28,6 +30,10 @@ class SouthParkEsIE(SouthParkIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
|
||||
'info_dict': {
|
||||
'title': 'Cartman Consigue Una Sonda Anal',
|
||||
'description': 'Cartman Consigue Una Sonda Anal',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
@@ -42,17 +48,27 @@ class SouthParkDeIE(SouthParkIE):
|
||||
'info_dict': {
|
||||
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Government Won\'t Respect My Privacy',
|
||||
'title': 'South Park|The Government Won\'t Respect My Privacy',
|
||||
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||
'timestamp': 1380160800,
|
||||
'upload_date': '20130926',
|
||||
},
|
||||
}, {
|
||||
# non-ASCII characters in initial URL
|
||||
'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'Hashtag „Aufwärmen“',
|
||||
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# non-ASCII characters in redirect URL
|
||||
'url': 'http://www.southpark.de/alle-episoden/s18e09',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'Hashtag „Aufwärmen“',
|
||||
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
|
||||
@@ -63,7 +79,11 @@ class SouthParkNlIE(SouthParkIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'Freemium Isn\'t Free',
|
||||
'description': 'Stan is addicted to the new Terrance and Phillip mobile game.',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
||||
|
||||
@@ -74,5 +94,9 @@ class SouthParkDkIE(SouthParkIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'Grounded Vindaloop',
|
||||
'description': 'Butters is convinced he\'s living in a virtual reality.',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
@@ -113,6 +113,7 @@ class SpiegelArticleIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
|
||||
'description': 're:^Patrick Kämnitz gehört.{100,}',
|
||||
'upload_date': '20140825',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
|
||||
|
@@ -4,26 +4,31 @@ from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class SpikeIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+|
|
||||
m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
|
||||
'''
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spike\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
|
||||
'md5': '1a9265f32b0c375793d6c4ce45255256',
|
||||
'info_dict': {
|
||||
'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
|
||||
'ext': 'mp4',
|
||||
'title': 'Auction Hunters|Can Allen Ride A Hundred Year-Old Motorcycle?',
|
||||
'title': 'Auction Hunters|December 27, 2013|4|414|Can Allen Ride A Hundred Year-Old Motorcycle?',
|
||||
'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
|
||||
'timestamp': 1388120400,
|
||||
'upload_date': '20131227',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://bellator.spike.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://bellator.spike.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
|
||||
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobile_id = self._match_id(url)
|
||||
if mobile_id:
|
||||
url = 'http://www.spike.com/video-clips/%s' % mobile_id
|
||||
return super(SpikeIE, self)._real_extract(url)
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
class SRMediathekIE(ARDMediathekIE):
|
||||
IE_NAME = 'sr:mediathek'
|
||||
IE_DESC = 'Saarländischer Rundfunk'
|
||||
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
|
||||
@@ -35,7 +35,9 @@ class SRMediathekIE(ARDMediathekIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest']
|
||||
}, {
|
||||
'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
98
youtube_dl/extractor/streamable.py
Normal file
98
youtube_dl/extractor/streamable.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class StreamableIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://streamable\.com/(?:e/)?(?P<id>\w+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://streamable.com/dnd1',
|
||||
'md5': '3e3bc5ca088b48c2d436529b64397fef',
|
||||
'info_dict': {
|
||||
'id': 'dnd1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'uploader': 'teabaker',
|
||||
'timestamp': 1454964157.35115,
|
||||
'upload_date': '20160208',
|
||||
'duration': 61.516,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
# older video without bitrate, width/height, etc. info
|
||||
{
|
||||
'url': 'https://streamable.com/moo',
|
||||
'md5': '2cf6923639b87fba3279ad0df3a64e73',
|
||||
'info_dict': {
|
||||
'id': 'moo',
|
||||
'ext': 'mp4',
|
||||
'title': '"Please don\'t eat me!"',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1426115495,
|
||||
'upload_date': '20150311',
|
||||
'duration': 12,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://streamable.com/e/dnd1',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Note: Using the ajax API, as the public Streamable API doesn't seem
|
||||
# to return video info like the title properly sometimes, and doesn't
|
||||
# include info like the video duration
|
||||
video = self._download_json(
|
||||
'https://streamable.com/ajax/videos/%s' % video_id, video_id)
|
||||
|
||||
# Format IDs:
|
||||
# 0 The video is being uploaded
|
||||
# 1 The video is being processed
|
||||
# 2 The video has at least one file ready
|
||||
# 3 The video is unavailable due to an error
|
||||
status = video.get('status')
|
||||
if status != 2:
|
||||
raise ExtractorError(
|
||||
'This video is currently unavailable. It may still be uploading or processing.',
|
||||
expected=True)
|
||||
|
||||
title = video.get('reddit_title') or video['title']
|
||||
|
||||
formats = []
|
||||
for key, info in video['files'].items():
|
||||
if not info.get('url'):
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': key,
|
||||
'url': self._proto_relative_url(info['url']),
|
||||
'width': int_or_none(info.get('width')),
|
||||
'height': int_or_none(info.get('height')),
|
||||
'filesize': int_or_none(info.get('size')),
|
||||
'fps': int_or_none(info.get('framerate')),
|
||||
'vbr': float_or_none(info.get('bitrate'), 1000)
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video.get('thumbnail_url')),
|
||||
'uploader': video.get('owner', {}).get('user_name'),
|
||||
'timestamp': float_or_none(video.get('date_added')),
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
'view_count': int_or_none(video.get('plays')),
|
||||
'formats': formats
|
||||
}
|
@@ -1,46 +1,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
update_url_query,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class SyfyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P<id>[0-9]+)|(?!videos)(?P<video_name>[^/]+)(?:$|[?#]))'
|
||||
|
||||
class SyfyIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458',
|
||||
'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
|
||||
'info_dict': {
|
||||
'id': 'NmqMrGnXvmO1',
|
||||
'ext': 'flv',
|
||||
'title': 'George Lucas has Advice for his Daughter',
|
||||
'description': 'Listen to what insights George Lucas give his daughter Amanda.',
|
||||
'id': '2968097',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Internet Ruined My Life: Season 1 Trailer',
|
||||
'description': 'One tweet, one post, one click, can destroy everything.',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'upload_date': '20170113',
|
||||
'timestamp': 1484345640,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.syfy.com/wilwheaton',
|
||||
'md5': '94dfa54ee3ccb63295b276da08c415f6',
|
||||
'info_dict': {
|
||||
'id': '4yoffOOXC767',
|
||||
'ext': 'flv',
|
||||
'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.',
|
||||
'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Blocked outside the US',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_name = mobj.group('video_name')
|
||||
if video_name:
|
||||
generic_webpage = self._download_webpage(url, video_name)
|
||||
video_id = self._search_regex(
|
||||
r'<iframe.*?class="video_iframe_page"\s+src="/_utils/video/thP_video_controller.php.*?_vid([0-9]+)">',
|
||||
generic_webpage, 'video ID')
|
||||
url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % (
|
||||
video_name, video_name, video_id)
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self.url_result(self._og_search_video_url(webpage))
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
syfy_mpx = list(self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'),
|
||||
display_id)['syfy']['syfy_mpx'].values())[0]
|
||||
video_id = syfy_mpx['mpxGUID']
|
||||
title = syfy_mpx['episodeTitle']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if syfy_mpx.get('entitlement') == 'auth':
|
||||
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>syfy</title><item><title><![CDATA[%s]]></title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'syfy', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(update_url_query(
|
||||
self._proto_relative_url(syfy_mpx['releaseURL']), query),
|
||||
{'force_smil_url': True}),
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
@@ -1,50 +1,41 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
get_element_by_attribute,
|
||||
parse_duration,
|
||||
strip_jsonp,
|
||||
)
|
||||
from .mitele import MiTeleBaseIE
|
||||
|
||||
|
||||
class TelecincoIE(InfoExtractor):
|
||||
class TelecincoIE(MiTeleBaseIE):
|
||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||
_VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||
'md5': '5cbef3ad5ef17bf0d21570332d140729',
|
||||
'md5': '8d7b2d5f699ee2709d992a63d5cd1712',
|
||||
'info_dict': {
|
||||
'id': 'MDSVID20141015_0058',
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Con Martín Berasategui, hacer un bacalao al ...',
|
||||
'title': 'Bacalao con kokotxas al pil-pil',
|
||||
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
|
||||
'duration': 662,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': '0a5b9f3cc8b074f50a0578f823a12694',
|
||||
'md5': '284393e5387b3b947b77c613ef04749a',
|
||||
'info_dict': {
|
||||
'id': 'MDSVID20150916_0128',
|
||||
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
||||
'ext': 'mp4',
|
||||
'title': '¿Quién es este ex futbolista con el que hablan ...',
|
||||
'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?',
|
||||
'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
|
||||
'duration': 79,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': 'ad1bfaaba922dd4a295724b05b68f86a',
|
||||
'md5': '749afab6ea5a136a8806855166ae46a2',
|
||||
'info_dict': {
|
||||
'id': 'MDSVID20150513_0220',
|
||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||
'ext': 'mp4',
|
||||
'title': '#DOYLACARA. Con la trata no hay trato',
|
||||
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
|
||||
'duration': 50,
|
||||
},
|
||||
}, {
|
||||
@@ -56,40 +47,16 @@ class TelecincoIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode)
|
||||
embed_data_json = self._search_regex(
|
||||
r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
|
||||
).replace('\'', '"')
|
||||
embed_data = json.loads(embed_data_json)
|
||||
|
||||
domain = embed_data['mediaUrl']
|
||||
if not domain.startswith('http'):
|
||||
# only happens in telecinco.es videos
|
||||
domain = 'http://' + domain
|
||||
info_url = compat_urlparse.urljoin(
|
||||
domain,
|
||||
compat_urllib_parse_unquote(embed_data['flashvars']['host'])
|
||||
)
|
||||
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||
|
||||
video_link = info_el.find('videoUrl/link').text
|
||||
token_query = compat_urllib_parse_urlencode({'id': video_link})
|
||||
token_info = self._download_json(
|
||||
embed_data['flashvars']['ov_tk'] + '?' + token_query,
|
||||
episode,
|
||||
transform_source=strip_jsonp
|
||||
)
|
||||
formats = self._extract_m3u8_formats(
|
||||
token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': embed_data['videoId'],
|
||||
'display_id': episode,
|
||||
'title': info_el.find('title').text,
|
||||
'formats': formats,
|
||||
'description': get_element_by_attribute('class', 'text', webpage),
|
||||
'thumbnail': info_el.find('thumb').text,
|
||||
'duration': parse_duration(info_el.find('duration').text),
|
||||
}
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title')
|
||||
info = self._get_player_info(url, webpage)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
['og:description', 'twitter:description'],
|
||||
webpage, 'title', fatal=False),
|
||||
})
|
||||
return info
|
||||
|
@@ -47,11 +47,10 @@ class TelegraafIE(InfoExtractor):
|
||||
ext = determine_ext(manifest_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||
manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
manifest_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
self.report_warning('Unknown adaptive format %s' % ext)
|
||||
for location in locations.get('progressive', []):
|
||||
|
@@ -24,16 +24,20 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
|
||||
'is_live': False,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'],
|
||||
}, {
|
||||
# live video stream
|
||||
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
|
||||
'info_dict': {
|
||||
'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
|
||||
'ext': 'mp4',
|
||||
'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
|
||||
'is_live': False,
|
||||
'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
}, {
|
||||
# live audio stream
|
||||
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
|
||||
@@ -92,12 +96,11 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
if not item_url or item_url in urls:
|
||||
return
|
||||
urls.add(item_url)
|
||||
type_ = item.get('type')
|
||||
ext = determine_ext(item_url, default_ext=None)
|
||||
if type_ == 'application/dash+xml' or ext == 'mpd':
|
||||
ext = mimetype2ext(item.get('type')) or determine_ext(item_url, default_ext=None)
|
||||
if ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
item_url, video_id, mpd_id='mpd', fatal=False))
|
||||
elif type_ in ('application/vnd.apple.mpegURL', 'application/x-mpegurl') or ext == 'm3u8':
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
item_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if live else 'm3u8_native',
|
||||
@@ -111,11 +114,11 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
formats.append({
|
||||
'url': item_url,
|
||||
'format_id': item.get('quality'),
|
||||
'ext': 'mp4' if item_url.startswith('rtsp') else mimetype2ext(type_) or ext,
|
||||
'ext': 'mp4' if item_url.startswith('rtsp') else ext,
|
||||
'vcodec': 'none' if stream_type == 'audio' else None,
|
||||
})
|
||||
|
||||
for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js):
|
||||
for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js):
|
||||
f = self._parse_json(
|
||||
item_js, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not f:
|
||||
|
@@ -5,31 +5,27 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class TMZIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/]+)/?'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tmz.com/videos/0_okj015ty/',
|
||||
'md5': '791204e3bf790b1426cb2db0706184c0',
|
||||
'md5': '4d22a51ef205b6c06395d8394f72d560',
|
||||
'info_dict': {
|
||||
'id': '0_okj015ty',
|
||||
'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
|
||||
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
|
||||
'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*',
|
||||
'timestamp': 1394747163,
|
||||
'uploader_id': 'batchUser',
|
||||
'upload_date': '20140313',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.tmz.com/videos/0-cegprt2p/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': self._html_search_meta('VideoURL', webpage, fatal=True),
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._html_search_meta('ThumbURL', webpage),
|
||||
}
|
||||
video_id = self._match_id(url).replace('-', '_')
|
||||
return self.url_result('kaltura:591531:%s' % video_id, 'Kaltura', video_id)
|
||||
|
||||
|
||||
class TMZArticleIE(InfoExtractor):
|
||||
|
@@ -1,74 +1,41 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class TouTvIE(InfoExtractor):
|
||||
IE_NAME = 'tou.tv'
|
||||
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
||||
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.tou.tv/30-vies/S04E41',
|
||||
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
|
||||
'info_dict': {
|
||||
'id': '30-vies_S04E41',
|
||||
'id': '122017',
|
||||
'ext': 'mp4',
|
||||
'title': '30 vies Saison 4 / Épisode 41',
|
||||
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||
'age_limit': 8,
|
||||
'uploader': 'Groupe des Nouveaux Médias',
|
||||
'duration': 1296,
|
||||
'upload_date': '20131118',
|
||||
'thumbnail': 'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
||||
'title': 'Saison 2015 Épisode 17',
|
||||
'description': 'La photo de famille 2',
|
||||
'upload_date': '20100717',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only available in Canada'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mediaId = self._search_regex(
|
||||
r'"idMedia":\s*"([^"]+)"', webpage, 'media ID')
|
||||
|
||||
streams_url = 'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||
streams_doc = self._download_xml(
|
||||
streams_url, video_id, note='Downloading stream list')
|
||||
|
||||
video_url = next(n.text
|
||||
for n in streams_doc.findall('.//choice/url')
|
||||
if '//ad.doubleclick' not in n.text)
|
||||
if video_url.endswith('/Unavailable.flv'):
|
||||
raise ExtractorError(
|
||||
'Access to this video is blocked from outside of Canada',
|
||||
expected=True)
|
||||
|
||||
duration_str = self._html_search_meta(
|
||||
'video:duration', webpage, 'duration')
|
||||
duration = int(duration_str) if duration_str else None
|
||||
upload_date_str = self._html_search_meta(
|
||||
'video:release_date', webpage, 'upload date')
|
||||
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
||||
path = self._match_id(url)
|
||||
metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
|
||||
video_id = metadata['IdMedia']
|
||||
details = metadata['Details']
|
||||
title = details['OriginalTitle']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id),
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': self._dc_search_uploader(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'age_limit': self._media_rating_search(webpage),
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'thumbnail': details.get('ImageUrl'),
|
||||
'duration': int_or_none(details.get('LengthInSeconds')),
|
||||
}
|
||||
|
@@ -9,56 +9,23 @@ class TVLandIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
|
||||
_FEED_URL = 'http://www.tvland.com/feeds/mrss/'
|
||||
_TESTS = [{
|
||||
# Geo-restricted. Without a proxy metadata are still there. With a
|
||||
# proxy it redirects to http://m.tvland.com/app/
|
||||
'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '227e9723b9669c05bf51098b10287aa7',
|
||||
'info_dict': {
|
||||
'id': 'bcbd3a83-3aca-4dca-809b-f78a87dcccdd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 1 of 5',
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '9fa2b764ec0e8194fb3ebb01a83df88b',
|
||||
'info_dict': {
|
||||
'id': 'f4279548-6e13-40dd-92e8-860d27289197',
|
||||
'ext': 'mp4',
|
||||
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 2 of 5',
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': 'fde4c3bccd7cc7e3576b338734153cec',
|
||||
'info_dict': {
|
||||
'id': '664e4a38-53ef-4115-9bc9-d0f789ec6334',
|
||||
'ext': 'mp4',
|
||||
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 3 of 5',
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '247f6780cda6891f2e49b8ae2b10e017',
|
||||
'info_dict': {
|
||||
'id': '9146ecf5-b15a-4d78-879c-6679b77f4960',
|
||||
'ext': 'mp4',
|
||||
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 4 of 5',
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': 'fd269f33256e47bad5eb6c40de089ff6',
|
||||
'info_dict': {
|
||||
'id': '04334a2e-9a47-4214-a8c2-ae5792e2fab7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 5 of 5',
|
||||
}
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'description': 'md5:80973e81b916a324e05c14a3fb506d29',
|
||||
'title': 'The Invasion',
|
||||
},
|
||||
'playlist': [],
|
||||
}, {
|
||||
'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies',
|
||||
'md5': 'e2c6389401cf485df26c79c247b08713',
|
||||
'info_dict': {
|
||||
'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88',
|
||||
'ext': 'mp4',
|
||||
'title': 'Younger|Younger: Hilary Duff - Little Lies',
|
||||
'description': 'md5:7d192f56ca8d958645c83f0de8ef0269'
|
||||
'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies',
|
||||
'description': 'md5:7d192f56ca8d958645c83f0de8ef0269',
|
||||
'upload_date': '20151228',
|
||||
'timestamp': 1451289600,
|
||||
},
|
||||
}]
|
||||
|
@@ -89,8 +89,8 @@ class TVPIE(InfoExtractor):
|
||||
r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
|
||||
video_url, 'video base url', default=None)
|
||||
if video_url_base:
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# TODO: <Group> found instead of <AdaptationSet> in MPD manifest.
|
||||
# It's not mentioned in MPEG-DASH standard. Figure that out.
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url_base + '.ism/video.mpd',
|
||||
# video_id, mpd_id='dash', fatal=False))
|
||||
|
@@ -8,43 +8,36 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
determine_ext,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TVPlayIE(InfoExtractor):
|
||||
IE_DESC = 'TV3Play and related services'
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?:tvplay\.lv/parraides|
|
||||
tv3play\.lt/programos|
|
||||
play\.tv3\.lt/programos|
|
||||
tv3play\.ee/sisu|
|
||||
tv3play\.se/program|
|
||||
tv6play\.se/program|
|
||||
tv8play\.se/program|
|
||||
tv10play\.se/program|
|
||||
tv3play\.no/programmer|
|
||||
viasat4play\.no/programmer|
|
||||
tv6play\.no/programmer|
|
||||
tv3play\.dk/programmer|
|
||||
(?:tvplay(?:\.skaties)?\.lv/parraides|
|
||||
(?:tv3play|play\.tv3)\.lt/programos|
|
||||
tv3play(?:\.tv3)?\.ee/sisu|
|
||||
tv(?:3|6|8|10)play\.se/program|
|
||||
(?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer|
|
||||
play\.novatv\.bg/programi
|
||||
)/[^/]+/(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'md5': 'a1612fe0849455423ad8718fe049be21',
|
||||
'info_dict': {
|
||||
'id': '418113',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kādi ir īri? - Viņas melo labāk',
|
||||
'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
|
||||
'duration': 25,
|
||||
'timestamp': 1406097056,
|
||||
'upload_date': '20140723',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
|
||||
@@ -82,7 +75,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '395385',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Husräddarna S02E07',
|
||||
'description': 'md5:f210c6c89f42d4fc39faa551be813777',
|
||||
'duration': 2574,
|
||||
@@ -90,7 +83,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'upload_date': '20140520',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@@ -98,7 +90,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '266636',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Den sista dokusåpan S01E08',
|
||||
'description': 'md5:295be39c872520221b933830f660b110',
|
||||
'duration': 1492,
|
||||
@@ -107,7 +99,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@@ -115,7 +106,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '282756',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Antikjakten S01E10',
|
||||
'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8',
|
||||
'duration': 2646,
|
||||
@@ -123,7 +114,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'upload_date': '20120925',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@@ -131,7 +121,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '230898',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anna Anka søker assistent - Ep. 8',
|
||||
'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474',
|
||||
'duration': 2656,
|
||||
@@ -139,7 +129,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'upload_date': '20100628',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@@ -147,7 +136,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '21873',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Budbringerne program 10',
|
||||
'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d',
|
||||
'duration': 1297,
|
||||
@@ -155,7 +144,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'upload_date': '20090929',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@@ -163,7 +151,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '361883',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hotelinspektør Alex Polizzi - Ep. 10',
|
||||
'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81',
|
||||
'duration': 2594,
|
||||
@@ -171,7 +159,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'upload_date': '20140224',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@@ -191,6 +178,14 @@ class TVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -199,7 +194,9 @@ class TVPlayIE(InfoExtractor):
|
||||
video = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
if video['is_geo_blocked']:
|
||||
title = video['title']
|
||||
|
||||
if video.get('is_geo_blocked'):
|
||||
self.report_warning(
|
||||
'This content might not be available in your country due to copyright reasons')
|
||||
|
||||
@@ -208,42 +205,50 @@ class TVPlayIE(InfoExtractor):
|
||||
|
||||
quality = qualities(['hls', 'medium', 'high'])
|
||||
formats = []
|
||||
for format_id, video_url in streams['streams'].items():
|
||||
for format_id, video_url in streams.get('streams', {}).items():
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'preference': quality(format_id),
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
|
||||
if not m:
|
||||
continue
|
||||
fmt.update({
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'app': m.group('app'),
|
||||
'play_path': m.group('playpath'),
|
||||
})
|
||||
elif video_url.endswith('.f4m'):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id))
|
||||
continue
|
||||
update_url_query(video_url, {
|
||||
'hdcore': '3.5.0',
|
||||
'plugin': 'aasp-3.5.0.151.81'
|
||||
}), video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
fmt.update({
|
||||
'url': video_url,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'ext': ext,
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
|
||||
if not m:
|
||||
continue
|
||||
fmt.update({
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'app': m.group('app'),
|
||||
'play_path': m.group('playpath'),
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': video_url,
|
||||
})
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'description': video['description'],
|
||||
'duration': video['duration'],
|
||||
'timestamp': parse_iso8601(video['created_at']),
|
||||
'view_count': video['views']['total'],
|
||||
'age_limit': video.get('age_limit', 0),
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('created_at')),
|
||||
'view_count': int_or_none(video.get('views', {}).get('total')),
|
||||
'age_limit': int_or_none(video.get('age_limit', 0)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,25 +1,62 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
class TweakersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
|
||||
'md5': '3147e4ddad366f97476a93863e4557c8',
|
||||
'md5': 'fe73e417c093a788e0160c4025f88b15',
|
||||
'info_dict': {
|
||||
'id': '9926',
|
||||
'ext': 'mp4',
|
||||
'title': 'New Nintendo 3DS XL - Op alle fronten beter',
|
||||
'description': 'md5:f97324cc71e86e11c853f0763820e3ba',
|
||||
'description': 'md5:3789b21fed9c0219e9bcaacd43fab280',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
'duration': 386,
|
||||
'uploader_id': 's7JeEm',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
entries = self._extract_xspf_playlist(
|
||||
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % playlist_id, playlist_id)
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://tweakers.net/video/s1playlist/%s/1920/1080/playlist.json' % video_id,
|
||||
video_id)['items'][0]
|
||||
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
for location in video_data.get('locations', {}).get('progressive', []):
|
||||
format_id = location.get('label')
|
||||
width = int_or_none(location.get('width'))
|
||||
height = int_or_none(location.get('height'))
|
||||
for source in location.get('sources', []):
|
||||
source_url = source.get('src')
|
||||
if not source_url:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type')) or determine_ext(source_url)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': source_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('poster'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'uploader_id': video_data.get('account'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -9,8 +9,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class VidziIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://vidzi.tv/cghql9yq6emu.html',
|
||||
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
|
||||
'info_dict': {
|
||||
@@ -22,12 +22,16 @@ class VidziIE(JWPlatformBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
|
||||
'skip_download': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'http://vidzi.tv/%s' % video_id, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
||||
|
||||
|
@@ -130,7 +130,7 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
|
||||
'md5': 'feea2b1d7b3957f70886e6dfd8b8be84',
|
||||
'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
|
||||
'info_dict': {
|
||||
'id': '1067139v',
|
||||
'ext': 'mp4',
|
||||
@@ -156,15 +156,11 @@ class VikiIE(VikiBaseIE):
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
}, {
|
||||
# episode
|
||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||
'md5': '1f54697dabc8f13f31bf06bb2e4de6db',
|
||||
'md5': '5fa476a902e902783ac7a4d615cdbc7a',
|
||||
'info_dict': {
|
||||
'id': '44699v',
|
||||
'ext': 'mp4',
|
||||
@@ -200,7 +196,7 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# non-English description
|
||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||
'md5': '013dc282714e22acf9447cad14ff1208',
|
||||
'md5': '1713ae35df5a521b31f6dc40730e7c9c',
|
||||
'info_dict': {
|
||||
'id': '158036v',
|
||||
'ext': 'mp4',
|
||||
@@ -281,9 +277,16 @@ class VikiIE(VikiBaseIE):
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
for protocol, format_dict in stream_dict.items():
|
||||
if format_id == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_dict['url'], video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='m3u8-%s' % protocol, fatal=False))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_dict['url'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', preference=-1,
|
||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||
# Despite CODECS metadata in m3u8 all video-only formats
|
||||
# are actually video+audio
|
||||
for f in m3u8_formats:
|
||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = None
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_dict['url'],
|
||||
|
@@ -364,6 +364,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
# Look more for non-standard embedded Vimeo player
|
||||
mobj = re.search(
|
||||
r'<video[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _verify_player_video_password(self, url, video_id):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
|
@@ -6,11 +6,18 @@ import json
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -20,7 +27,55 @@ from .vimeo import VimeoIE
|
||||
from .pladform import PladformIE
|
||||
|
||||
|
||||
class VKIE(InfoExtractor):
|
||||
class VKBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'vk'
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page, url_handle = self._download_webpage_handle(
|
||||
'https://vk.com', None, 'Downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'email': username.encode('cp1251'),
|
||||
'pass': password.encode('cp1251'),
|
||||
})
|
||||
|
||||
# https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
|
||||
# and expects the first one to be set rather than second (see
|
||||
# https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201).
|
||||
# As of RFC6265 the newer one cookie should be set into cookie store
|
||||
# what actually happens.
|
||||
# We will workaround this VK issue by resetting the remixlhk cookie to
|
||||
# the first one manually.
|
||||
cookies = url_handle.headers.get('Set-Cookie')
|
||||
if cookies:
|
||||
if sys.version_info[0] >= 3:
|
||||
cookies = cookies.encode('iso-8859-1')
|
||||
cookies = cookies.decode('utf-8')
|
||||
remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
|
||||
if remixlhk:
|
||||
value, domain = remixlhk.groups()
|
||||
self._set_cookie(domain, 'remixlhk', value)
|
||||
|
||||
login_page = self._download_webpage(
|
||||
'https://login.vk.com/?act=login', None,
|
||||
note='Logging in as %s' % username,
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
if re.search(r'onLoginFailed', login_page):
|
||||
raise ExtractorError(
|
||||
'Unable to login, incorrect username and/or password', expected=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
|
||||
class VKIE(VKBaseIE):
|
||||
IE_NAME = 'vk'
|
||||
IE_DESC = 'VK'
|
||||
_VALID_URL = r'''(?x)
|
||||
@@ -38,8 +93,6 @@ class VKIE(InfoExtractor):
|
||||
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
|
||||
)
|
||||
'''
|
||||
_NETRC_MACHINE = 'vk'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
|
||||
@@ -189,49 +242,6 @@ class VKIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page, url_handle = self._download_webpage_handle(
|
||||
'https://vk.com', None, 'Downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'email': username.encode('cp1251'),
|
||||
'pass': password.encode('cp1251'),
|
||||
})
|
||||
|
||||
# https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
|
||||
# and expects the first one to be set rather than second (see
|
||||
# https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201).
|
||||
# As of RFC6265 the newer one cookie should be set into cookie store
|
||||
# what actually happens.
|
||||
# We will workaround this VK issue by resetting the remixlhk cookie to
|
||||
# the first one manually.
|
||||
cookies = url_handle.headers.get('Set-Cookie')
|
||||
if sys.version_info[0] >= 3:
|
||||
cookies = cookies.encode('iso-8859-1')
|
||||
cookies = cookies.decode('utf-8')
|
||||
remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
|
||||
if remixlhk:
|
||||
value, domain = remixlhk.groups()
|
||||
self._set_cookie(domain, 'remixlhk', value)
|
||||
|
||||
login_page = self._download_webpage(
|
||||
'https://login.vk.com/?act=login', None,
|
||||
note='Logging in as %s' % username,
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
if re.search(r'onLoginFailed', login_page):
|
||||
raise ExtractorError(
|
||||
'Unable to login, incorrect username and/or password', expected=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
@@ -355,7 +365,7 @@ class VKIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class VKUserVideosIE(InfoExtractor):
|
||||
class VKUserVideosIE(VKBaseIE):
|
||||
IE_NAME = 'vk:uservideos'
|
||||
IE_DESC = "VK - User's Videos"
|
||||
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
|
||||
@@ -396,3 +406,121 @@ class VKUserVideosIE(InfoExtractor):
|
||||
webpage, 'title', default=page_id))
|
||||
|
||||
return self.playlist_result(entries, page_id, title)
|
||||
|
||||
|
||||
class VKWallPostIE(VKBaseIE):
|
||||
IE_NAME = 'vk:wallpost'
|
||||
_VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
|
||||
_TESTS = [{
|
||||
# public page URL, audio playlist
|
||||
'url': 'https://vk.com/bs.official?w=wall-23538238_35',
|
||||
'info_dict': {
|
||||
'id': '23538238_35',
|
||||
'title': 'Black Shadow - Wall post 23538238_35',
|
||||
'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
|
||||
'info_dict': {
|
||||
'id': '135220665_111806521',
|
||||
'ext': 'mp3',
|
||||
'title': 'Black Shadow - Слепое Верование',
|
||||
'duration': 370,
|
||||
'uploader': 'Black Shadow',
|
||||
'artist': 'Black Shadow',
|
||||
'track': 'Слепое Верование',
|
||||
},
|
||||
}, {
|
||||
'md5': '4cc7e804579122b17ea95af7834c9233',
|
||||
'info_dict': {
|
||||
'id': '135220665_111802303',
|
||||
'ext': 'mp3',
|
||||
'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
|
||||
'duration': 423,
|
||||
'uploader': 'Black Shadow',
|
||||
'artist': 'Black Shadow',
|
||||
'track': 'Война - Негасимое Бездны Пламя!',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}],
|
||||
'skip': 'Requires vk account credentials',
|
||||
}, {
|
||||
# single YouTube embed, no leading -
|
||||
'url': 'https://vk.com/wall85155021_6319',
|
||||
'info_dict': {
|
||||
'id': '85155021_6319',
|
||||
'title': 'Sergey Gorbunov - Wall post 85155021_6319',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires vk account credentials',
|
||||
}, {
|
||||
# wall page URL
|
||||
'url': 'https://vk.com/wall-23538238_35',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# mobile wall page URL
|
||||
'url': 'https://m.vk.com/wall-23538238_35',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
|
||||
wall_url = 'https://vk.com/wall%s' % post_id
|
||||
|
||||
post_id = remove_start(post_id, '-')
|
||||
|
||||
webpage = self._download_webpage(wall_url, post_id)
|
||||
|
||||
error = self._html_search_regex(
|
||||
r'>Error</div>\s*<div[^>]+class=["\']body["\'][^>]*>([^<]+)',
|
||||
webpage, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError('VK said: %s' % error, expected=True)
|
||||
|
||||
description = clean_html(get_element_by_class('wall_post_text', webpage))
|
||||
uploader = clean_html(get_element_by_class(
|
||||
'fw_post_author', webpage)) or self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
entries = []
|
||||
|
||||
for audio in re.finditer(r'''(?sx)
|
||||
<input[^>]+
|
||||
id=(?P<q1>["\'])audio_info(?P<id>\d+_\d+).*?(?P=q1)[^>]+
|
||||
value=(?P<q2>["\'])(?P<url>http.+?)(?P=q2)
|
||||
.+?
|
||||
</table>''', webpage):
|
||||
audio_html = audio.group(0)
|
||||
audio_id = audio.group('id')
|
||||
duration = parse_duration(get_element_by_class('duration', audio_html))
|
||||
track = self._html_search_regex(
|
||||
r'<span[^>]+id=["\']title%s[^>]*>([^<]+)' % audio_id,
|
||||
audio_html, 'title', default=None)
|
||||
artist = self._html_search_regex(
|
||||
r'>([^<]+)</a></b>\s*&ndash', audio_html,
|
||||
'artist', default=None)
|
||||
entries.append({
|
||||
'id': audio_id,
|
||||
'url': audio.group('url'),
|
||||
'title': '%s - %s' % (artist, track) if artist and track else audio_id,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'artist': artist,
|
||||
'track': track,
|
||||
})
|
||||
|
||||
for video in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
|
||||
|
||||
title = 'Wall post %s' % post_id
|
||||
|
||||
return self.playlist_result(
|
||||
orderedSet(entries), post_id,
|
||||
'%s - %s' % (uploader, title) if uploader else title,
|
||||
description)
|
||||
|
@@ -9,7 +9,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
qualities,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ class VuClipIE(InfoExtractor):
|
||||
'id': '922692425',
|
||||
'ext': '3gp',
|
||||
'title': 'The Toy Soldiers - Hollywood Movie Trailer',
|
||||
'duration': 180,
|
||||
'duration': 177,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,34 +46,21 @@ class VuClipIE(InfoExtractor):
|
||||
'%s said: %s' % (self.IE_NAME, error_msg), expected=True)
|
||||
|
||||
# These clowns alternate between two page types
|
||||
links_code = self._search_regex(
|
||||
r'''(?xs)
|
||||
(?:
|
||||
<img\s+src="[^"]*/play.gif".*?>|
|
||||
<!--\ player\ end\ -->\s*</div><!--\ thumb\ end-->
|
||||
)
|
||||
(.*?)
|
||||
(?:
|
||||
<a\s+href="fblike|<div\s+class="social">
|
||||
)
|
||||
''', webpage, 'links')
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip()
|
||||
video_url = self._search_regex(
|
||||
r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif',
|
||||
webpage, 'video URL', default=None)
|
||||
if video_url:
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
}]
|
||||
else:
|
||||
formats = self._parse_html5_media_entries(url, webpage)[0]['formats']
|
||||
|
||||
quality_order = qualities(['Reg', 'Hi'])
|
||||
formats = []
|
||||
for url, q in re.findall(
|
||||
r'<a\s+href="(?P<url>[^"]+)".*?>(?:<button[^>]*>)?(?P<q>[^<]+)(?:</button>)?</a>', links_code):
|
||||
format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'quality': quality_order(q),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video')
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'\(([0-9:]+)\)</span>', webpage, 'duration', fatal=False))
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -9,7 +9,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
HEADRequest,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -95,16 +94,7 @@ class WatIE(InfoExtractor):
|
||||
m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
for m3u8_format in m3u8_formats:
|
||||
mobj = re.search(
|
||||
r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
|
||||
if not mobj:
|
||||
continue
|
||||
abr, vbr = mobj.groups()
|
||||
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||
m3u8_format.update({
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
|
||||
if not vbr or not abr:
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
|
@@ -9,8 +9,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class YouJizzIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P<id>[0-9]+)\.html(?:$|[?#])'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
|
||||
'md5': '07e15fa469ba384c7693fd246905547c',
|
||||
'info_dict': {
|
||||
@@ -19,7 +19,10 @@ class YouJizzIE(InfoExtractor):
|
||||
'title': 'Zeichentrick 1',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.youjizz.com/videos/-2189178.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -53,6 +53,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
"""Provide base functions for Youtube extractors"""
|
||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
|
||||
_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
@@ -116,12 +117,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
'hl': 'en_US',
|
||||
}
|
||||
|
||||
login_data = urlencode_postdata(login_form_strs)
|
||||
|
||||
req = sanitized_Request(self._LOGIN_URL, login_data)
|
||||
login_results = self._download_webpage(
|
||||
req, None,
|
||||
note='Logging in', errnote='unable to log in', fatal=False)
|
||||
self._PASSWORD_CHALLENGE_URL, None,
|
||||
note='Logging in', errnote='unable to log in', fatal=False,
|
||||
data=urlencode_postdata(login_form_strs))
|
||||
if login_results is False:
|
||||
return False
|
||||
|
||||
@@ -137,7 +136,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
# Two-Factor
|
||||
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
|
||||
if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
|
||||
tfa_code = self._get_tfa_info('2-step verification code')
|
||||
|
||||
if not tfa_code:
|
||||
@@ -165,17 +164,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
if tfa_results is False:
|
||||
return False
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
|
||||
if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
|
||||
self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
|
||||
return False
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
|
||||
if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
|
||||
self._downloader.report_warning('unable to log in - did the page structure change?')
|
||||
return False
|
||||
if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
|
||||
self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
||||
return False
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||
if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
|
||||
self._downloader.report_warning('unable to log in: bad username or password')
|
||||
return False
|
||||
return True
|
||||
@@ -858,6 +857,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
{
|
||||
'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
|
||||
'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1731,7 +1735,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
|
||||
class YoutubeSharedVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P<id>[0-9A-Za-z_-]{11})'
|
||||
_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'
|
||||
IE_NAME = 'youtube:shared'
|
||||
|
||||
_TEST = {
|
||||
@@ -1978,10 +1982,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
|
||||
else super(YoutubeChannelIE, cls).suitable(url))
|
||||
|
||||
def _build_template_url(self, url, channel_id):
|
||||
return self._TEMPLATE_URL % channel_id
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
url = self._TEMPLATE_URL % channel_id
|
||||
url = self._build_template_url(url, channel_id)
|
||||
|
||||
# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
|
||||
# Workaround by extracting as a playlist if managed to obtain channel playlist URL
|
||||
@@ -2038,8 +2045,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
|
||||
class YoutubeUserIE(YoutubeChannelIE):
|
||||
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
|
||||
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
|
||||
IE_NAME = 'youtube:user'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -2049,12 +2056,24 @@ class YoutubeUserIE(YoutubeChannelIE):
|
||||
'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
|
||||
'title': 'Uploads from The Linux Foundation',
|
||||
}
|
||||
}, {
|
||||
# Only available via https://www.youtube.com/c/12minuteathlete/videos
|
||||
# but not https://www.youtube.com/user/12minuteathlete/videos
|
||||
'url': 'https://www.youtube.com/c/12minuteathlete/videos',
|
||||
'playlist_mincount': 249,
|
||||
'info_dict': {
|
||||
'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
|
||||
'title': 'Uploads from 12 Minute Athlete',
|
||||
}
|
||||
}, {
|
||||
'url': 'ytuser:phihag',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/c/gametrailers',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/gametrailers',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# This channel is not available.
|
||||
'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
|
||||
@@ -2071,6 +2090,10 @@ class YoutubeUserIE(YoutubeChannelIE):
|
||||
else:
|
||||
return super(YoutubeUserIE, cls).suitable(url)
|
||||
|
||||
def _build_template_url(self, url, channel_id):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
|
||||
|
||||
|
||||
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com live streams'
|
||||
|
@@ -26,7 +26,11 @@ def parseOpts(overrideArguments=None):
|
||||
except IOError:
|
||||
return default # silently skip if file is not present
|
||||
try:
|
||||
res = compat_shlex_split(optionf.read(), comments=True)
|
||||
# FIXME: https://github.com/rg3/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
|
||||
contents = optionf.read()
|
||||
if sys.version_info < (3,):
|
||||
contents = contents.decode(preferredencoding())
|
||||
res = compat_shlex_split(contents, comments=True)
|
||||
finally:
|
||||
optionf.close()
|
||||
return res
|
||||
@@ -812,11 +816,11 @@ def parseOpts(overrideArguments=None):
|
||||
system_conf = []
|
||||
user_conf = []
|
||||
else:
|
||||
system_conf = compat_conf(_readOptions('/etc/youtube-dl.conf'))
|
||||
system_conf = _readOptions('/etc/youtube-dl.conf')
|
||||
if '--ignore-config' in system_conf:
|
||||
user_conf = []
|
||||
else:
|
||||
user_conf = compat_conf(_readUserConf())
|
||||
user_conf = _readUserConf()
|
||||
argv = system_conf + user_conf + command_line_conf
|
||||
|
||||
opts, args = parser.parse_args(argv)
|
||||
|
@@ -310,9 +310,17 @@ def get_element_by_id(id, html):
|
||||
return get_element_by_attribute('id', id, html)
|
||||
|
||||
|
||||
def get_element_by_attribute(attribute, value, html):
|
||||
def get_element_by_class(class_name, html):
|
||||
return get_element_by_attribute(
|
||||
'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
|
||||
html, escape_value=False)
|
||||
|
||||
|
||||
def get_element_by_attribute(attribute, value, html, escape_value=True):
|
||||
"""Return the content of the tag with the specified attribute in the passed HTML document"""
|
||||
|
||||
value = re.escape(value) if escape_value else value
|
||||
|
||||
m = re.search(r'''(?xs)
|
||||
<([a-zA-Z0-9:._-]+)
|
||||
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
|
||||
@@ -321,7 +329,7 @@ def get_element_by_attribute(attribute, value, html):
|
||||
\s*>
|
||||
(?P<content>.*?)
|
||||
</\1>
|
||||
''' % (re.escape(attribute), re.escape(value)), html)
|
||||
''' % (re.escape(attribute), value), html)
|
||||
|
||||
if not m:
|
||||
return None
|
||||
@@ -2097,6 +2105,7 @@ def mimetype2ext(mt):
|
||||
return ext
|
||||
|
||||
_, _, res = mt.rpartition('/')
|
||||
res = res.lower()
|
||||
|
||||
return {
|
||||
'3gpp': '3gp',
|
||||
@@ -2108,9 +2117,53 @@ def mimetype2ext(mt):
|
||||
'x-flv': 'flv',
|
||||
'x-mp4-fragmented': 'mp4',
|
||||
'x-ms-wmv': 'wmv',
|
||||
'mpegurl': 'm3u8',
|
||||
'x-mpegurl': 'm3u8',
|
||||
'vnd.apple.mpegurl': 'm3u8',
|
||||
'dash+xml': 'mpd',
|
||||
'f4m': 'f4m',
|
||||
'f4m+xml': 'f4m',
|
||||
'hds+xml': 'f4m',
|
||||
'vnd.ms-sstr+xml': 'ism',
|
||||
}.get(res, res)
|
||||
|
||||
|
||||
def parse_codecs(codecs_str):
|
||||
# http://tools.ietf.org/html/rfc6381
|
||||
if not codecs_str:
|
||||
return {}
|
||||
splited_codecs = list(filter(None, map(
|
||||
lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
|
||||
vcodec, acodec = None, None
|
||||
for full_codec in splited_codecs:
|
||||
codec = full_codec.split('.')[0]
|
||||
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
|
||||
if not vcodec:
|
||||
vcodec = full_codec
|
||||
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'):
|
||||
if not acodec:
|
||||
acodec = full_codec
|
||||
else:
|
||||
write_string('WARNING: Unknown codec %s' % full_codec, sys.stderr)
|
||||
if not vcodec and not acodec:
|
||||
if len(splited_codecs) == 2:
|
||||
return {
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
}
|
||||
elif len(splited_codecs) == 1:
|
||||
return {
|
||||
'vcodec': 'none',
|
||||
'acodec': vcodec,
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'vcodec': vcodec or 'none',
|
||||
'acodec': acodec or 'none',
|
||||
}
|
||||
return {}
|
||||
|
||||
|
||||
def urlhandle_detect_ext(url_handle):
|
||||
getheader = url_handle.headers.get
|
||||
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.07.06'
|
||||
__version__ = '2016.07.26.2'
|
||||
|
Reference in New Issue
Block a user