Compare commits
213 Commits
2015.08.16
...
2015.09.09
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d85187eb74 | ||
|
|
cc1ac11017 | ||
|
|
73f536439e | ||
|
|
b17e7d9a9b | ||
|
|
2f29b758e0 | ||
|
|
482aa3fecc | ||
|
|
d9c19db340 | ||
|
|
6c4d243de5 | ||
|
|
d1561ef777 | ||
|
|
1072336249 | ||
|
|
75bb5c7028 | ||
|
|
47004d9579 | ||
|
|
12810c9cd3 | ||
|
|
7a459170fa | ||
|
|
3cf0df568a | ||
|
|
b88ebd472e | ||
|
|
3ecc527209 | ||
|
|
b1b7d1ffba | ||
|
|
4003bd82b0 | ||
|
|
8801255d7d | ||
|
|
c67a055d16 | ||
|
|
bc973e06d0 | ||
|
|
aeb3c8a0e8 | ||
|
|
cf33a47df0 | ||
|
|
daeb0f04cd | ||
|
|
97243fe395 | ||
|
|
9dbdb65abe | ||
|
|
9af461de35 | ||
|
|
4d71e200c6 | ||
|
|
8e0bdabed2 | ||
|
|
bca553caac | ||
|
|
a2f42a3baf | ||
|
|
7465222a9c | ||
|
|
e28034c5ac | ||
|
|
12bbd32ad0 | ||
|
|
266e466ee4 | ||
|
|
cf83f532ae | ||
|
|
cd019668dc | ||
|
|
515fc8776f | ||
|
|
c7c0996d8c | ||
|
|
b3e64671cc | ||
|
|
4abe214499 | ||
|
|
e94cb5ae7e | ||
|
|
e213c98df1 | ||
|
|
1639282434 | ||
|
|
be0e5dbd83 | ||
|
|
ad72917274 | ||
|
|
6a3f4c3f82 | ||
|
|
a6420bf50c | ||
|
|
eb387896e9 | ||
|
|
8df5ae15d1 | ||
|
|
12439dd5ec | ||
|
|
3513d41436 | ||
|
|
cab792abe5 | ||
|
|
8870358b1b | ||
|
|
ee087c79ad | ||
|
|
51f579b635 | ||
|
|
4abf617b9c | ||
|
|
3026164b16 | ||
|
|
9dd73ef4a4 | ||
|
|
75c72a1e67 | ||
|
|
08354db47b | ||
|
|
027eb5a6b0 | ||
|
|
f71264490c | ||
|
|
6270239a6d | ||
|
|
1195a38f46 | ||
|
|
66e289bab4 | ||
|
|
52c6f26cab | ||
|
|
dc534b674f | ||
|
|
f30c2e8e98 | ||
|
|
c482b3c69a | ||
|
|
266b0ad676 | ||
|
|
87f70ab39d | ||
|
|
8e636da499 | ||
|
|
22889ab175 | ||
|
|
5d2354f177 | ||
|
|
a41fb80ce1 | ||
|
|
2e2575e213 | ||
|
|
26c61e0809 | ||
|
|
e7a8c3032d | ||
|
|
725d1c58aa | ||
|
|
bd6742137f | ||
|
|
e8dcfa3d69 | ||
|
|
88720ed09b | ||
|
|
1e804244d0 | ||
|
|
198492bbf0 | ||
|
|
8f9d522f62 | ||
|
|
cbae233aba | ||
|
|
b17ca9c945 | ||
|
|
ebf4ca39ba | ||
|
|
e5e78797e6 | ||
|
|
080997b808 | ||
|
|
77306e8b97 | ||
|
|
6917d2a2f0 | ||
|
|
36c15522c1 | ||
|
|
804c343a4f | ||
|
|
cd5d75427e | ||
|
|
5ddc127da6 | ||
|
|
f859695b49 | ||
|
|
cb3d2eb9e9 | ||
|
|
33eae08f04 | ||
|
|
aa3f98677d | ||
|
|
fffccaaf41 | ||
|
|
cdc8d0c373 | ||
|
|
d14f0c45fc | ||
|
|
39955b0451 | ||
|
|
52dfb7ffe2 | ||
|
|
93462856e1 | ||
|
|
615f155a3a | ||
|
|
fcd9e423ec | ||
|
|
db8f2bfd99 | ||
|
|
55801fc76e | ||
|
|
d3d89c3256 | ||
|
|
8875b3d572 | ||
|
|
aabc2be693 | ||
|
|
c9afb51cea | ||
|
|
c0a656876c | ||
|
|
17a647630b | ||
|
|
c88e118b3c | ||
|
|
ae6a802106 | ||
|
|
b184f94413 | ||
|
|
ee3ec091f4 | ||
|
|
ef49b59053 | ||
|
|
1f8125805e | ||
|
|
efd712c69b | ||
|
|
109a4156e1 | ||
|
|
678d33295b | ||
|
|
5e58956d0a | ||
|
|
e276fd2cb3 | ||
|
|
9b22cb10c4 | ||
|
|
8ca31a0e05 | ||
|
|
20149a5da1 | ||
|
|
054d43bb11 | ||
|
|
65488b820c | ||
|
|
c3c9f87954 | ||
|
|
56f447be9f | ||
|
|
79fa9db0da | ||
|
|
071c10137b | ||
|
|
a4962b80d6 | ||
|
|
5307c33232 | ||
|
|
1b660cce12 | ||
|
|
8df8c278b6 | ||
|
|
d7e8264517 | ||
|
|
f11c316347 | ||
|
|
f62e02c24f | ||
|
|
70113c38c9 | ||
|
|
3d8132f5e2 | ||
|
|
39affb5aa4 | ||
|
|
a882c5f474 | ||
|
|
61a7ff1622 | ||
|
|
42e7373bd3 | ||
|
|
e269d3ae7d | ||
|
|
e7ddaef5bd | ||
|
|
62984e4584 | ||
|
|
3c53455d15 | ||
|
|
bbb43a39fd | ||
|
|
43e7d3c945 | ||
|
|
2f72e83bbd | ||
|
|
57179b4ca1 | ||
|
|
4bc8eec4eb | ||
|
|
baf510bf8c | ||
|
|
6d53cdd6ce | ||
|
|
ebbf078c7d | ||
|
|
95e431e9ec | ||
|
|
eba470f2f2 | ||
|
|
11addc50ff | ||
|
|
e4df2f98cc | ||
|
|
e7c14660d3 | ||
|
|
90076b6172 | ||
|
|
28b83495d8 | ||
|
|
551c7837ac | ||
|
|
59e6acc757 | ||
|
|
9990c960f2 | ||
|
|
2006a06eff | ||
|
|
2b6bda1ed8 | ||
|
|
468083d2f5 | ||
|
|
483fc223bb | ||
|
|
66ce97024d | ||
|
|
8c97f81943 | ||
|
|
d7c1630570 | ||
|
|
5e1a5ac8de | ||
|
|
9eb4ab6ad9 | ||
|
|
4932a817a0 | ||
|
|
5d003e29b1 | ||
|
|
dc95bd503e | ||
|
|
f738dd7b7c | ||
|
|
f908b74fa3 | ||
|
|
c687ac745b | ||
|
|
912e0b7e46 | ||
|
|
03bc7237ad | ||
|
|
061f62da54 | ||
|
|
dd565ac1ad | ||
|
|
5cdefc4625 | ||
|
|
ce00af8767 | ||
|
|
51047444aa | ||
|
|
aa6cd05ed8 | ||
|
|
dac14bf311 | ||
|
|
05fe2594e4 | ||
|
|
26e1c3514f | ||
|
|
22c83245c5 | ||
|
|
7900aede14 | ||
|
|
f877c6ae5a | ||
|
|
ca681f7041 | ||
|
|
a01da8bbf8 | ||
|
|
f3a65d9636 | ||
|
|
559f4c550f | ||
|
|
03c635a4b5 | ||
|
|
34a4cd0a34 | ||
|
|
3b9b32f404 | ||
|
|
9c724a9802 | ||
|
|
7a6e8a1b17 | ||
|
|
369c12e038 | ||
|
|
233c1c0e76 |
@@ -5,9 +5,7 @@ python:
|
|||||||
- "3.2"
|
- "3.2"
|
||||||
- "3.3"
|
- "3.3"
|
||||||
- "3.4"
|
- "3.4"
|
||||||
before_install:
|
sudo: false
|
||||||
- sudo apt-get update -qq
|
|
||||||
- sudo apt-get install -yqq rtmpdump
|
|
||||||
script: nosetests test --verbose
|
script: nosetests test --verbose
|
||||||
notifications:
|
notifications:
|
||||||
email:
|
email:
|
||||||
|
|||||||
3
AUTHORS
3
AUTHORS
@@ -140,3 +140,6 @@ Behrouz Abbasi
|
|||||||
ngld
|
ngld
|
||||||
nyuszika7h
|
nyuszika7h
|
||||||
Shaun Walbridge
|
Shaun Walbridge
|
||||||
|
Lee Jenkins
|
||||||
|
Anssi Hannula
|
||||||
|
Lukáš Lalinský
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ You can also use pip:
|
|||||||
|
|
||||||
sudo pip install youtube-dl
|
sudo pip install youtube-dl
|
||||||
|
|
||||||
Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
|
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
|
||||||
|
|
||||||
# DESCRIPTION
|
# DESCRIPTION
|
||||||
**youtube-dl** is a small command-line program to download videos from
|
**youtube-dl** is a small command-line program to download videos from
|
||||||
@@ -207,7 +207,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
-p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively.
|
-p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively.
|
||||||
-2, --twofactor TWOFACTOR Two-factor auth code
|
-2, --twofactor TWOFACTOR Two-factor auth code
|
||||||
-n, --netrc Use .netrc authentication data
|
-n, --netrc Use .netrc authentication data
|
||||||
--video-password PASSWORD Video password (vimeo, smotri)
|
--video-password PASSWORD Video password (vimeo, smotri, youku)
|
||||||
|
|
||||||
## Post-processing Options:
|
## Post-processing Options:
|
||||||
-x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
|
-x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
|
||||||
@@ -552,7 +552,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
|
|||||||
@@ -166,7 +166,7 @@
|
|||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
- **FootyRoom**
|
- **FootyRoom**
|
||||||
- **Foxgay**
|
- **Foxgay**
|
||||||
- **FoxNews**
|
- **FoxNews**: Fox News and Fox Business Video
|
||||||
- **FoxSports**
|
- **FoxSports**
|
||||||
- **france2.fr:generation-quoi**
|
- **france2.fr:generation-quoi**
|
||||||
- **FranceCulture**
|
- **FranceCulture**
|
||||||
@@ -195,7 +195,7 @@
|
|||||||
- **GodTube**
|
- **GodTube**
|
||||||
- **GoldenMoustache**
|
- **GoldenMoustache**
|
||||||
- **Golem**
|
- **Golem**
|
||||||
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
|
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com
|
||||||
- **Goshgay**
|
- **Goshgay**
|
||||||
- **Groupon**
|
- **Groupon**
|
||||||
- **Hark**
|
- **Hark**
|
||||||
@@ -220,6 +220,7 @@
|
|||||||
- **imdb**: Internet Movie Database trailers
|
- **imdb**: Internet Movie Database trailers
|
||||||
- **imdb:list**: Internet Movie Database lists
|
- **imdb:list**: Internet Movie Database lists
|
||||||
- **Imgur**
|
- **Imgur**
|
||||||
|
- **ImgurAlbum**
|
||||||
- **Ina**
|
- **Ina**
|
||||||
- **Indavideo**
|
- **Indavideo**
|
||||||
- **IndavideoEmbed**
|
- **IndavideoEmbed**
|
||||||
@@ -301,13 +302,15 @@
|
|||||||
- **Moviezine**
|
- **Moviezine**
|
||||||
- **movshare**: MovShare
|
- **movshare**: MovShare
|
||||||
- **MPORA**
|
- **MPORA**
|
||||||
|
- **MSNBC**
|
||||||
- **MTV**
|
- **MTV**
|
||||||
|
- **mtv.de**
|
||||||
- **mtviggy.com**
|
- **mtviggy.com**
|
||||||
- **mtvservices:embedded**
|
- **mtvservices:embedded**
|
||||||
- **MuenchenTV**: münchen.tv
|
- **MuenchenTV**: münchen.tv
|
||||||
- **MusicPlayOn**
|
- **MusicPlayOn**
|
||||||
- **MusicVault**
|
|
||||||
- **muzu.tv**
|
- **muzu.tv**
|
||||||
|
- **Mwave**
|
||||||
- **MySpace**
|
- **MySpace**
|
||||||
- **MySpace:album**
|
- **MySpace:album**
|
||||||
- **MySpass**
|
- **MySpass**
|
||||||
@@ -392,6 +395,8 @@
|
|||||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||||
- **Playvid**
|
- **Playvid**
|
||||||
- **Playwire**
|
- **Playwire**
|
||||||
|
- **pluralsight**
|
||||||
|
- **pluralsight:course**
|
||||||
- **plus.google**: Google Plus
|
- **plus.google**: Google Plus
|
||||||
- **pluzz.francetv.fr**
|
- **pluzz.francetv.fr**
|
||||||
- **podomatic**
|
- **podomatic**
|
||||||
@@ -461,7 +466,7 @@
|
|||||||
- **Sexu**
|
- **Sexu**
|
||||||
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
||||||
- **Shahid**
|
- **Shahid**
|
||||||
- **Shared**
|
- **Shared**: shared.sx and vivo.sx
|
||||||
- **ShareSix**
|
- **ShareSix**
|
||||||
- **Sina**
|
- **Sina**
|
||||||
- **Slideshare**
|
- **Slideshare**
|
||||||
@@ -534,6 +539,7 @@
|
|||||||
- **TF1**
|
- **TF1**
|
||||||
- **TheOnion**
|
- **TheOnion**
|
||||||
- **ThePlatform**
|
- **ThePlatform**
|
||||||
|
- **ThePlatformFeed**
|
||||||
- **TheSixtyOne**
|
- **TheSixtyOne**
|
||||||
- **ThisAmericanLife**
|
- **ThisAmericanLife**
|
||||||
- **ThisAV**
|
- **ThisAV**
|
||||||
@@ -599,7 +605,6 @@
|
|||||||
- **Viddler**
|
- **Viddler**
|
||||||
- **video.google:search**: Google Video search
|
- **video.google:search**: Google Video search
|
||||||
- **video.mit.edu**
|
- **video.mit.edu**
|
||||||
- **VideoBam**
|
|
||||||
- **VideoDetective**
|
- **VideoDetective**
|
||||||
- **videofy.me**
|
- **videofy.me**
|
||||||
- **videolectures.net**
|
- **videolectures.net**
|
||||||
@@ -627,6 +632,7 @@
|
|||||||
- **vine:user**
|
- **vine:user**
|
||||||
- **vk**: VK
|
- **vk**: VK
|
||||||
- **vk:uservideos**: VK - User's Videos
|
- **vk:uservideos**: VK - User's Videos
|
||||||
|
- **vlive**
|
||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
- **VoiceRepublic**
|
- **VoiceRepublic**
|
||||||
- **Vporn**
|
- **Vporn**
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from youtube_dl.utils import get_filesystem_encoding
|
|||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
|
compat_shlex_split,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
)
|
)
|
||||||
@@ -67,5 +68,8 @@ class TestCompat(unittest.TestCase):
|
|||||||
self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
|
self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
|
||||||
self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
|
self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
|
||||||
|
|
||||||
|
def test_compat_shlex_split(self):
|
||||||
|
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ from youtube_dl.extractor import (
|
|||||||
RaiIE,
|
RaiIE,
|
||||||
VikiIE,
|
VikiIE,
|
||||||
ThePlatformIE,
|
ThePlatformIE,
|
||||||
|
ThePlatformFeedIE,
|
||||||
RTVEALaCartaIE,
|
RTVEALaCartaIE,
|
||||||
FunnyOrDieIE,
|
FunnyOrDieIE,
|
||||||
)
|
)
|
||||||
@@ -307,6 +308,18 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
|
|||||||
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||||
|
|
||||||
|
|
||||||
|
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
|
||||||
|
IE = ThePlatformFeedIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
|
||||||
|
|
||||||
|
|
||||||
class TestRtveSubtitles(BaseTestSubtitles):
|
class TestRtveSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
|
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
|
||||||
IE = RTVEALaCartaIE
|
IE = RTVEALaCartaIE
|
||||||
|
|||||||
@@ -57,11 +57,16 @@ from youtube_dl.utils import (
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
xpath_attr,
|
||||||
render_table,
|
render_table,
|
||||||
match_str,
|
match_str,
|
||||||
parse_dfxp_time_expr,
|
parse_dfxp_time_expr,
|
||||||
dfxp2srt,
|
dfxp2srt,
|
||||||
|
cli_option,
|
||||||
|
cli_valueless_option,
|
||||||
|
cli_bool_option,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -264,6 +269,16 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
||||||
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
||||||
|
|
||||||
|
def test_xpath_element(self):
|
||||||
|
doc = xml.etree.ElementTree.Element('root')
|
||||||
|
div = xml.etree.ElementTree.SubElement(doc, 'div')
|
||||||
|
p = xml.etree.ElementTree.SubElement(div, 'p')
|
||||||
|
p.text = 'Foo'
|
||||||
|
self.assertEqual(xpath_element(doc, 'div/p'), p)
|
||||||
|
self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default')
|
||||||
|
self.assertTrue(xpath_element(doc, 'div/bar') is None)
|
||||||
|
self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True)
|
||||||
|
|
||||||
def test_xpath_text(self):
|
def test_xpath_text(self):
|
||||||
testxml = '''<root>
|
testxml = '''<root>
|
||||||
<div>
|
<div>
|
||||||
@@ -272,9 +287,25 @@ class TestUtil(unittest.TestCase):
|
|||||||
</root>'''
|
</root>'''
|
||||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||||
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
||||||
|
self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')
|
||||||
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
||||||
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
|
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
|
||||||
|
|
||||||
|
def test_xpath_attr(self):
|
||||||
|
testxml = '''<root>
|
||||||
|
<div>
|
||||||
|
<p x="a">Foo</p>
|
||||||
|
</div>
|
||||||
|
</root>'''
|
||||||
|
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||||
|
self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')
|
||||||
|
self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)
|
||||||
|
self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None)
|
||||||
|
self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default')
|
||||||
|
self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default')
|
||||||
|
self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True)
|
||||||
|
self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True)
|
||||||
|
|
||||||
def test_smuggle_url(self):
|
def test_smuggle_url(self):
|
||||||
data = {"ö": "ö", "abc": [3]}
|
data = {"ö": "ö", "abc": [3]}
|
||||||
url = 'https://foo.bar/baz?x=y#a'
|
url = 'https://foo.bar/baz?x=y#a'
|
||||||
@@ -646,6 +677,51 @@ The first line
|
|||||||
'''
|
'''
|
||||||
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
|
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
|
||||||
|
|
||||||
|
def test_cli_option(self):
|
||||||
|
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||||
|
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||||
|
self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
|
||||||
|
|
||||||
|
def test_cli_valueless_option(self):
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader'])
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), [])
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate'])
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), [])
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), [])
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate'])
|
||||||
|
|
||||||
|
def test_cli_bool_option(self):
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'),
|
||||||
|
['--no-check-certificate', 'true'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='),
|
||||||
|
['--no-check-certificate=true'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
|
||||||
|
['--check-certificate', 'false'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
|
||||||
|
['--check-certificate=false'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
|
||||||
|
['--check-certificate', 'true'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
|
||||||
|
['--check-certificate=true'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ params = get_params({
|
|||||||
|
|
||||||
|
|
||||||
TEST_ID = 'gr51aVj-mLg'
|
TEST_ID = 'gr51aVj-mLg'
|
||||||
ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
|
ANNOTATIONS_FILE = TEST_ID + '.annotations.xml'
|
||||||
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
|
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -69,6 +69,7 @@ from .utils import (
|
|||||||
version_tuple,
|
version_tuple,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
write_string,
|
write_string,
|
||||||
|
YoutubeDLCookieProcessor,
|
||||||
YoutubeDLHandler,
|
YoutubeDLHandler,
|
||||||
prepend_extension,
|
prepend_extension,
|
||||||
replace_extension,
|
replace_extension,
|
||||||
@@ -284,7 +285,11 @@ class YoutubeDL(object):
|
|||||||
self._num_downloads = 0
|
self._num_downloads = 0
|
||||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||||
self._err_file = sys.stderr
|
self._err_file = sys.stderr
|
||||||
self.params = params
|
self.params = {
|
||||||
|
# Default parameters
|
||||||
|
'nocheckcertificate': False,
|
||||||
|
}
|
||||||
|
self.params.update(params)
|
||||||
self.cache = Cache(self)
|
self.cache = Cache(self)
|
||||||
|
|
||||||
if params.get('bidi_workaround', False):
|
if params.get('bidi_workaround', False):
|
||||||
@@ -1939,8 +1944,7 @@ class YoutubeDL(object):
|
|||||||
if os.access(opts_cookiefile, os.R_OK):
|
if os.access(opts_cookiefile, os.R_OK):
|
||||||
self.cookiejar.load()
|
self.cookiejar.load()
|
||||||
|
|
||||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
|
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||||
self.cookiejar)
|
|
||||||
if opts_proxy is not None:
|
if opts_proxy is not None:
|
||||||
if opts_proxy == '':
|
if opts_proxy == '':
|
||||||
proxies = {}
|
proxies = {}
|
||||||
@@ -2009,7 +2013,7 @@ class YoutubeDL(object):
|
|||||||
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
||||||
try:
|
try:
|
||||||
uf = self.urlopen(t['url'])
|
uf = self.urlopen(t['url'])
|
||||||
with open(thumb_filename, 'wb') as thumbf:
|
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
||||||
shutil.copyfileobj(uf, thumbf)
|
shutil.copyfileobj(uf, thumbf)
|
||||||
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
|
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
|
||||||
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
|
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ import codecs
|
|||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import shlex
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
@@ -20,6 +19,7 @@ from .compat import (
|
|||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
compat_print,
|
compat_print,
|
||||||
|
compat_shlex_split,
|
||||||
workaround_optparse_bug9161,
|
workaround_optparse_bug9161,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
@@ -262,10 +262,10 @@ def _real_main(argv=None):
|
|||||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||||
external_downloader_args = None
|
external_downloader_args = None
|
||||||
if opts.external_downloader_args:
|
if opts.external_downloader_args:
|
||||||
external_downloader_args = shlex.split(opts.external_downloader_args)
|
external_downloader_args = compat_shlex_split(opts.external_downloader_args)
|
||||||
postprocessor_args = None
|
postprocessor_args = None
|
||||||
if opts.postprocessor_args:
|
if opts.postprocessor_args:
|
||||||
postprocessor_args = shlex.split(opts.postprocessor_args)
|
postprocessor_args = compat_shlex_split(opts.postprocessor_args)
|
||||||
match_filter = (
|
match_filter = (
|
||||||
None if opts.match_filter is None
|
None if opts.match_filter is None
|
||||||
else match_filter_func(opts.match_filter))
|
else match_filter_func(opts.match_filter))
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import getpass
|
|||||||
import optparse
|
import optparse
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import shlex
|
||||||
import shutil
|
import shutil
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -227,6 +228,17 @@ except ImportError: # Python < 3.3
|
|||||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info >= (2, 7, 3):
|
||||||
|
compat_shlex_split = shlex.split
|
||||||
|
else:
|
||||||
|
# Working around shlex issue with unicode strings on some python 2
|
||||||
|
# versions (see http://bugs.python.org/issue1548891)
|
||||||
|
def compat_shlex_split(s, comments=False, posix=True):
|
||||||
|
if isinstance(s, unicode):
|
||||||
|
s = s.encode('utf-8')
|
||||||
|
return shlex.split(s, comments, posix)
|
||||||
|
|
||||||
|
|
||||||
def compat_ord(c):
|
def compat_ord(c):
|
||||||
if type(c) is int:
|
if type(c) is int:
|
||||||
return c
|
return c
|
||||||
@@ -459,6 +471,7 @@ __all__ = [
|
|||||||
'compat_ord',
|
'compat_ord',
|
||||||
'compat_parse_qs',
|
'compat_parse_qs',
|
||||||
'compat_print',
|
'compat_print',
|
||||||
|
'compat_shlex_split',
|
||||||
'compat_socket_create_connection',
|
'compat_socket_create_connection',
|
||||||
'compat_str',
|
'compat_str',
|
||||||
'compat_subprocess_get_DEVNULL',
|
'compat_subprocess_get_DEVNULL',
|
||||||
|
|||||||
@@ -5,6 +5,10 @@ import subprocess
|
|||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
cli_option,
|
||||||
|
cli_valueless_option,
|
||||||
|
cli_bool_option,
|
||||||
|
cli_configuration_args,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
encodeArgument,
|
encodeArgument,
|
||||||
)
|
)
|
||||||
@@ -46,19 +50,16 @@ class ExternalFD(FileDownloader):
|
|||||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||||
|
|
||||||
def _option(self, command_option, param):
|
def _option(self, command_option, param):
|
||||||
param = self.params.get(param)
|
return cli_option(self.params, command_option, param)
|
||||||
if param is None:
|
|
||||||
return []
|
def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
|
||||||
if isinstance(param, bool):
|
return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
|
||||||
return [command_option]
|
|
||||||
return [command_option, param]
|
def _valueless_option(self, command_option, param, expected_value=True):
|
||||||
|
return cli_valueless_option(self.params, command_option, param, expected_value)
|
||||||
|
|
||||||
def _configuration_args(self, default=[]):
|
def _configuration_args(self, default=[]):
|
||||||
ex_args = self.params.get('external_downloader_args')
|
return cli_configuration_args(self.params, 'external_downloader_args', default)
|
||||||
if ex_args is None:
|
|
||||||
return default
|
|
||||||
assert isinstance(ex_args, list)
|
|
||||||
return ex_args
|
|
||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
""" Either overwrite this or implement _make_cmd """
|
""" Either overwrite this or implement _make_cmd """
|
||||||
@@ -80,6 +81,8 @@ class CurlFD(ExternalFD):
|
|||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
|
cmd += self._option('--proxy', 'proxy')
|
||||||
|
cmd += self._valueless_option('--insecure', 'nocheckcertificate')
|
||||||
cmd += self._configuration_args()
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
@@ -102,7 +105,7 @@ class WgetFD(ExternalFD):
|
|||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--bind-address', 'source_address')
|
cmd += self._option('--bind-address', 'source_address')
|
||||||
cmd += self._option('--proxy', 'proxy')
|
cmd += self._option('--proxy', 'proxy')
|
||||||
cmd += self._option('--no-check-certificate', 'nocheckcertificate')
|
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
||||||
cmd += self._configuration_args()
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
@@ -121,6 +124,7 @@ class Aria2cFD(ExternalFD):
|
|||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
cmd += self._option('--all-proxy', 'proxy')
|
cmd += self._option('--all-proxy', 'proxy')
|
||||||
|
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ from ..compat import (
|
|||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
encodeFilename,
|
||||||
|
sanitize_open,
|
||||||
struct_pack,
|
struct_pack,
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
@@ -343,18 +345,19 @@ class F4mFD(FragmentFD):
|
|||||||
success = ctx['dl'].download(frag_filename, {'url': url})
|
success = ctx['dl'].download(frag_filename, {'url': url})
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
with open(frag_filename, 'rb') as down:
|
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
|
||||||
down_data = down.read()
|
down_data = down.read()
|
||||||
reader = FlvReader(down_data)
|
down.close()
|
||||||
while True:
|
reader = FlvReader(down_data)
|
||||||
_, box_type, box_data = reader.read_box_info()
|
while True:
|
||||||
if box_type == b'mdat':
|
_, box_type, box_data = reader.read_box_info()
|
||||||
dest_stream.write(box_data)
|
if box_type == b'mdat':
|
||||||
break
|
dest_stream.write(box_data)
|
||||||
|
break
|
||||||
if live:
|
if live:
|
||||||
os.remove(frag_filename)
|
os.remove(encodeFilename(frag_sanitized))
|
||||||
else:
|
else:
|
||||||
frags_filenames.append(frag_filename)
|
frags_filenames.append(frag_sanitized)
|
||||||
except (compat_urllib_error.HTTPError, ) as err:
|
except (compat_urllib_error.HTTPError, ) as err:
|
||||||
if live and (err.code == 404 or err.code == 410):
|
if live and (err.code == 404 or err.code == 410):
|
||||||
# We didn't keep up with the live window. Continue
|
# We didn't keep up with the live window. Continue
|
||||||
@@ -375,6 +378,6 @@ class F4mFD(FragmentFD):
|
|||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
|
||||||
for frag_file in frags_filenames:
|
for frag_file in frags_filenames:
|
||||||
os.remove(frag_file)
|
os.remove(encodeFilename(frag_file))
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
encodeArgument,
|
encodeArgument,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
|
sanitize_open,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -89,13 +90,13 @@ class NativeHlsFD(FragmentFD):
|
|||||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
with open(frag_filename, 'rb') as down:
|
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||||
ctx['dest_stream'].write(down.read())
|
ctx['dest_stream'].write(down.read())
|
||||||
frags_filenames.append(frag_filename)
|
frags_filenames.append(frag_sanitized)
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
|
||||||
for frag_file in frags_filenames:
|
for frag_file in frags_filenames:
|
||||||
os.remove(frag_file)
|
os.remove(encodeFilename(frag_file))
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -241,7 +241,10 @@ from .imdb import (
|
|||||||
ImdbIE,
|
ImdbIE,
|
||||||
ImdbListIE
|
ImdbListIE
|
||||||
)
|
)
|
||||||
from .imgur import ImgurIE
|
from .imgur import (
|
||||||
|
ImgurIE,
|
||||||
|
ImgurAlbumIE,
|
||||||
|
)
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .indavideo import (
|
from .indavideo import (
|
||||||
IndavideoIE,
|
IndavideoIE,
|
||||||
@@ -340,11 +343,12 @@ from .mtv import (
|
|||||||
MTVIE,
|
MTVIE,
|
||||||
MTVServicesEmbeddedIE,
|
MTVServicesEmbeddedIE,
|
||||||
MTVIggyIE,
|
MTVIggyIE,
|
||||||
|
MTVDEIE,
|
||||||
)
|
)
|
||||||
from .muenchentv import MuenchenTVIE
|
from .muenchentv import MuenchenTVIE
|
||||||
from .musicplayon import MusicPlayOnIE
|
from .musicplayon import MusicPlayOnIE
|
||||||
from .musicvault import MusicVaultIE
|
|
||||||
from .muzu import MuzuTVIE
|
from .muzu import MuzuTVIE
|
||||||
|
from .mwave import MwaveIE
|
||||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
from .myvi import MyviIE
|
from .myvi import MyviIE
|
||||||
@@ -358,6 +362,7 @@ from .nbc import (
|
|||||||
NBCNewsIE,
|
NBCNewsIE,
|
||||||
NBCSportsIE,
|
NBCSportsIE,
|
||||||
NBCSportsVPlayerIE,
|
NBCSportsVPlayerIE,
|
||||||
|
MSNBCIE,
|
||||||
)
|
)
|
||||||
from .ndr import (
|
from .ndr import (
|
||||||
NDRIE,
|
NDRIE,
|
||||||
@@ -452,6 +457,10 @@ from .playfm import PlayFMIE
|
|||||||
from .playtvak import PlaytvakIE
|
from .playtvak import PlaytvakIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .playwire import PlaywireIE
|
from .playwire import PlaywireIE
|
||||||
|
from .pluralsight import (
|
||||||
|
PluralsightIE,
|
||||||
|
PluralsightCourseIE,
|
||||||
|
)
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .porn91 import Porn91IE
|
from .porn91 import Porn91IE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
@@ -612,7 +621,10 @@ from .testurl import TestURLIE
|
|||||||
from .testtube import TestTubeIE
|
from .testtube import TestTubeIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
from .theonion import TheOnionIE
|
from .theonion import TheOnionIE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import (
|
||||||
|
ThePlatformIE,
|
||||||
|
ThePlatformFeedIE,
|
||||||
|
)
|
||||||
from .thesixtyone import TheSixtyOneIE
|
from .thesixtyone import TheSixtyOneIE
|
||||||
from .thisamericanlife import ThisAmericanLifeIE
|
from .thisamericanlife import ThisAmericanLifeIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
@@ -696,7 +708,6 @@ from .vgtv import (
|
|||||||
from .vh1 import VH1IE
|
from .vh1 import VH1IE
|
||||||
from .vice import ViceIE
|
from .vice import ViceIE
|
||||||
from .viddler import ViddlerIE
|
from .viddler import ViddlerIE
|
||||||
from .videobam import VideoBamIE
|
|
||||||
from .videodetective import VideoDetectiveIE
|
from .videodetective import VideoDetectiveIE
|
||||||
from .videolecturesnet import VideoLecturesNetIE
|
from .videolecturesnet import VideoLecturesNetIE
|
||||||
from .videofyme import VideofyMeIE
|
from .videofyme import VideofyMeIE
|
||||||
@@ -731,6 +742,7 @@ from .vk import (
|
|||||||
VKIE,
|
VKIE,
|
||||||
VKUserVideosIE,
|
VKUserVideosIE,
|
||||||
)
|
)
|
||||||
|
from .vlive import VLiveIE
|
||||||
from .vodlocker import VodlockerIE
|
from .vodlocker import VodlockerIE
|
||||||
from .voicerepublic import VoiceRepublicIE
|
from .voicerepublic import VoiceRepublicIE
|
||||||
from .vporn import VpornIE
|
from .vporn import VpornIE
|
||||||
|
|||||||
@@ -1,16 +1,20 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ABCIE(InfoExtractor):
|
class ABCIE(InfoExtractor):
|
||||||
IE_NAME = 'abc.net.au'
|
IE_NAME = 'abc.net.au'
|
||||||
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||||
'md5': 'cb3dd03b18455a661071ee1e28344d9f',
|
'md5': 'cb3dd03b18455a661071ee1e28344d9f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -19,22 +23,47 @@ class ABCIE(InfoExtractor):
|
|||||||
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
|
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
|
||||||
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
|
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
|
||||||
|
'md5': 'db2a5369238b51f9811ad815b69dc086',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'NvqvPeNZsHU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20150816',
|
||||||
|
'uploader': 'ABC News (Australia)',
|
||||||
|
'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef',
|
||||||
|
'uploader_id': 'NewsOnABC',
|
||||||
|
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
urls_info_json = self._search_regex(
|
mobj = re.search(
|
||||||
r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
|
r'inline(?P<type>Video|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||||
flags=re.DOTALL)
|
webpage)
|
||||||
urls_info = json.loads(urls_info_json.replace('\'', '"'))
|
if mobj is None:
|
||||||
|
raise ExtractorError('Unable to extract video urls')
|
||||||
|
|
||||||
|
urls_info = self._parse_json(
|
||||||
|
mobj.group('json_data'), video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
if not isinstance(urls_info, list):
|
||||||
|
urls_info = [urls_info]
|
||||||
|
|
||||||
|
if mobj.group('type') == 'YouTube':
|
||||||
|
return self.playlist_result([
|
||||||
|
self.url_result(url_info['url']) for url_info in urls_info])
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': url_info['url'],
|
'url': url_info['url'],
|
||||||
'width': int(url_info['width']),
|
'width': int_or_none(url_info.get('width')),
|
||||||
'height': int(url_info['height']),
|
'height': int_or_none(url_info.get('height')),
|
||||||
'tbr': int(url_info['bitrate']),
|
'tbr': int_or_none(url_info.get('bitrate')),
|
||||||
'filesize': int(url_info['filesize']),
|
'filesize': int_or_none(url_info.get('filesize')),
|
||||||
} for url_info in urls_info]
|
} for url_info in urls_info]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ class AcademicEarthCourseIE(InfoExtractor):
|
|||||||
'title': 'Laws of Nature',
|
'title': 'Laws of Nature',
|
||||||
'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
|
'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
|
||||||
},
|
},
|
||||||
'playlist_count': 4,
|
'playlist_count': 3,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
xpath_text(idoc, './/trt', 'segment duration').strip())
|
xpath_text(idoc, './/trt', 'segment duration').strip())
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
file_els = idoc.findall('.//files/file')
|
file_els = idoc.findall('.//files/file') or idoc.findall('./files/file')
|
||||||
|
|
||||||
for file_el in file_els:
|
for file_el in file_els:
|
||||||
bitrate = file_el.attrib.get('bitrate')
|
bitrate = file_el.attrib.get('bitrate')
|
||||||
|
|||||||
@@ -20,14 +20,14 @@ class AirMozillaIE(InfoExtractor):
|
|||||||
'id': '6x4q2w',
|
'id': '6x4q2w',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
||||||
'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
|
'thumbnail': 're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
|
||||||
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
||||||
'timestamp': 1422487800,
|
'timestamp': 1422487800,
|
||||||
'upload_date': '20150128',
|
'upload_date': '20150128',
|
||||||
'location': 'SFO Commons',
|
'location': 'SFO Commons',
|
||||||
'duration': 3780,
|
'duration': 3780,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'categories': ['Main'],
|
'categories': ['Main', 'Privacy'],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ class AlJazeeraIE(InfoExtractor):
|
|||||||
'uploader': 'Al Jazeera English',
|
'uploader': 'Al Jazeera English',
|
||||||
},
|
},
|
||||||
'add_ie': ['Brightcove'],
|
'add_ie': ['Brightcove'],
|
||||||
|
'skip': 'Not accessible from Travis CI server',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -17,55 +17,81 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CeskaTelevizeIE(InfoExtractor):
|
class CeskaTelevizeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||||
|
_TESTS = [{
|
||||||
_TESTS = [
|
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||||
{
|
'info_dict': {
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
'id': '61924494876951776',
|
||||||
'info_dict': {
|
'ext': 'mp4',
|
||||||
'id': '214411058091220',
|
'title': 'Hyde Park Civilizace',
|
||||||
'ext': 'mp4',
|
'description': 'md5:fe93f6eda372d150759d11644ebbfb4a',
|
||||||
'title': 'Hyde Park Civilizace',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře',
|
'duration': 3350,
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
|
||||||
'duration': 3350,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
'params': {
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
# m3u8 download
|
||||||
'info_dict': {
|
'skip_download': True,
|
||||||
'id': '14716',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
|
||||||
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
|
||||||
'duration': 88.4,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
]
|
}, {
|
||||||
|
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '61924494876844374',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
||||||
|
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 88.4,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# video with 18+ caution trailer
|
||||||
|
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '215562210900007-bogotart',
|
||||||
|
'title': 'Queer: Bogotart',
|
||||||
|
'description': 'Alternativní průvodce současným queer světem',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '61924494876844842',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Queer: Bogotart (Varování 18+)',
|
||||||
|
'duration': 10.2,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '61924494877068022',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Queer: Bogotart (Queer)',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 1558.3,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
|
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
playlist_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||||
|
|
||||||
typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
|
typ = self._html_search_regex(
|
||||||
episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
|
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
|
||||||
|
episode_id = self._html_search_regex(
|
||||||
|
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
'playlist[0][type]': typ,
|
'playlist[0][type]': typ,
|
||||||
@@ -83,7 +109,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
playlistpage = self._download_json(req, video_id)
|
playlistpage = self._download_json(req, playlist_id)
|
||||||
|
|
||||||
playlist_url = playlistpage['url']
|
playlist_url = playlistpage['url']
|
||||||
if playlist_url == 'error_region':
|
if playlist_url == 'error_region':
|
||||||
@@ -92,33 +118,43 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
|
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
playlist = self._download_json(req, video_id)
|
playlist_title = self._og_search_title(webpage)
|
||||||
|
playlist_description = self._og_search_description(webpage)
|
||||||
|
|
||||||
item = playlist['playlist'][0]
|
playlist = self._download_json(req, playlist_id)['playlist']
|
||||||
formats = []
|
playlist_len = len(playlist)
|
||||||
for format_id, stream_url in item['streamUrls'].items():
|
|
||||||
formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4'))
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
entries = []
|
||||||
description = self._og_search_description(webpage)
|
for item in playlist:
|
||||||
duration = float_or_none(item.get('duration'))
|
formats = []
|
||||||
thumbnail = item.get('previewImageUrl')
|
for format_id, stream_url in item['streamUrls'].items():
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native'))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
item_id = item.get('id') or item['assetId']
|
||||||
subs = item.get('subtitles')
|
title = item['title']
|
||||||
if subs:
|
|
||||||
subtitles = self.extract_subtitles(episode_id, subs)
|
|
||||||
|
|
||||||
return {
|
duration = float_or_none(item.get('duration'))
|
||||||
'id': episode_id,
|
thumbnail = item.get('previewImageUrl')
|
||||||
'title': title,
|
|
||||||
'description': description,
|
subtitles = {}
|
||||||
'thumbnail': thumbnail,
|
if item.get('type') == 'VOD':
|
||||||
'duration': duration,
|
subs = item.get('subtitles')
|
||||||
'formats': formats,
|
if subs:
|
||||||
'subtitles': subtitles,
|
subtitles = self.extract_subtitles(episode_id, subs)
|
||||||
}
|
|
||||||
|
entries.append({
|
||||||
|
'id': item_id,
|
||||||
|
'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title),
|
||||||
|
'description': playlist_description if playlist_len == 1 else None,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def _get_subtitles(self, episode_id, subs):
|
def _get_subtitles(self, episode_id, subs):
|
||||||
original_subtitles = self._download_webpage(
|
original_subtitles = self._download_webpage(
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import ExtractorError
|
||||||
from .bliptv import BlipTVIE
|
from .bliptv import BlipTVIE
|
||||||
|
from .screenwavemedia import ScreenwaveMediaIE
|
||||||
|
|
||||||
|
|
||||||
class CinemassacreIE(InfoExtractor):
|
class CinemassacreIE(InfoExtractor):
|
||||||
@@ -83,10 +84,10 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
|
|
||||||
playerdata_url = self._search_regex(
|
playerdata_url = self._search_regex(
|
||||||
[
|
[
|
||||||
r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
ScreenwaveMediaIE.EMBED_PATTERN,
|
||||||
r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||||
],
|
],
|
||||||
webpage, 'player data URL', default=None)
|
webpage, 'player data URL', default=None, group='url')
|
||||||
if not playerdata_url:
|
if not playerdata_url:
|
||||||
playerdata_url = BlipTVIE._extract_url(webpage)
|
playerdata_url = BlipTVIE._extract_url(webpage)
|
||||||
if not playerdata_url:
|
if not playerdata_url:
|
||||||
|
|||||||
@@ -510,6 +510,12 @@ class InfoExtractor(object):
|
|||||||
"""Report attempt to log in."""
|
"""Report attempt to log in."""
|
||||||
self.to_screen('Logging in')
|
self.to_screen('Logging in')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def raise_login_required(msg='This video is only available for registered users'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
# Methods for following #608
|
# Methods for following #608
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||||
@@ -727,7 +733,7 @@ class InfoExtractor(object):
|
|||||||
def _hidden_inputs(html):
|
def _hidden_inputs(html):
|
||||||
hidden_inputs = {}
|
hidden_inputs = {}
|
||||||
for input in re.findall(r'<input([^>]+)>', html):
|
for input in re.findall(r'<input([^>]+)>', html):
|
||||||
if not re.search(r'type=(["\'])hidden\1', input):
|
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
|
||||||
continue
|
continue
|
||||||
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
|
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
|
||||||
if not name:
|
if not name:
|
||||||
@@ -1052,7 +1058,7 @@ class InfoExtractor(object):
|
|||||||
return self._search_regex(
|
return self._search_regex(
|
||||||
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||||
|
|
||||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
|
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||||
base = smil_url
|
base = smil_url
|
||||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||||
b = meta.get('base') or meta.get('httpBase')
|
b = meta.get('base') or meta.get('httpBase')
|
||||||
@@ -1091,6 +1097,12 @@ class InfoExtractor(object):
|
|||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
|
if transform_rtmp_url:
|
||||||
|
streamer, src = transform_rtmp_url(streamer, src)
|
||||||
|
formats[-1].update({
|
||||||
|
'url': streamer,
|
||||||
|
'play_path': src,
|
||||||
|
})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||||
@@ -1129,7 +1141,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _parse_smil_subtitles(self, smil, namespace=None):
|
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||||
src = textstream.get('src')
|
src = textstream.get('src')
|
||||||
@@ -1138,9 +1150,14 @@ class InfoExtractor(object):
|
|||||||
ext = textstream.get('ext') or determine_ext(src)
|
ext = textstream.get('ext') or determine_ext(src)
|
||||||
if not ext:
|
if not ext:
|
||||||
type_ = textstream.get('type')
|
type_ = textstream.get('type')
|
||||||
if type_ == 'text/srt':
|
SUBTITLES_TYPES = {
|
||||||
ext = 'srt'
|
'text/vtt': 'vtt',
|
||||||
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName')
|
'text/srt': 'srt',
|
||||||
|
'application/smptett+xml': 'tt',
|
||||||
|
}
|
||||||
|
if type_ in SUBTITLES_TYPES:
|
||||||
|
ext = SUBTITLES_TYPES[type_]
|
||||||
|
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
|
||||||
subtitles.setdefault(lang, []).append({
|
subtitles.setdefault(lang, []).append({
|
||||||
'url': src,
|
'url': src,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
@@ -1268,6 +1285,23 @@ class InfoExtractor(object):
|
|||||||
def _get_subtitles(self, *args, **kwargs):
|
def _get_subtitles(self, *args, **kwargs):
|
||||||
raise NotImplementedError("This method must be implemented by subclasses")
|
raise NotImplementedError("This method must be implemented by subclasses")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _merge_subtitle_items(subtitle_list1, subtitle_list2):
|
||||||
|
""" Merge subtitle items for one language. Items with duplicated URLs
|
||||||
|
will be dropped. """
|
||||||
|
list1_urls = set([item['url'] for item in subtitle_list1])
|
||||||
|
ret = list(subtitle_list1)
|
||||||
|
ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
|
||||||
|
return ret
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
|
||||||
|
""" Merge two subtitle dictionaries, language by language. """
|
||||||
|
ret = dict(subtitle_dict1)
|
||||||
|
for lang in subtitle_dict2:
|
||||||
|
ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
|
||||||
|
return ret
|
||||||
|
|
||||||
def extract_automatic_captions(self, *args, **kwargs):
|
def extract_automatic_captions(self, *args, **kwargs):
|
||||||
if (self._downloader.params.get('writeautomaticsub', False) or
|
if (self._downloader.params.get('writeautomaticsub', False) or
|
||||||
self._downloader.params.get('listsubtitles')):
|
self._downloader.params.get('listsubtitles')):
|
||||||
|
|||||||
@@ -14,20 +14,40 @@ from ..compat import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
bytes_to_intlist,
|
bytes_to_intlist,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
|
int_or_none,
|
||||||
|
remove_end,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_cbc_decrypt,
|
aes_cbc_decrypt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(InfoExtractor):
|
class CrunchyrollBaseIE(InfoExtractor):
|
||||||
|
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
||||||
|
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||||
|
else compat_urllib_request.Request(url_or_request))
|
||||||
|
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||||
|
# similar to https://github.com/rg3/youtube-dl/issues/6797.
|
||||||
|
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
||||||
|
# should be imposed or not (from what I can see it just takes the first language
|
||||||
|
# ignoring the priority and requires it to correspond the IP). By the way this causes
|
||||||
|
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||||
|
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||||
|
request.add_header('Accept-Language', '*')
|
||||||
|
return super(CrunchyrollBaseIE, self)._download_webpage(
|
||||||
|
request, video_id, note, errnote, fatal, tries, timeout, encoding)
|
||||||
|
|
||||||
|
|
||||||
|
class CrunchyrollIE(CrunchyrollBaseIE):
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_NETRC_MACHINE = 'crunchyroll'
|
_NETRC_MACHINE = 'crunchyroll'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -235,7 +255,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
webpage_url = 'http://www.' + mobj.group('url')
|
webpage_url = 'http://www.' + mobj.group('url')
|
||||||
|
|
||||||
webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
|
webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
|
||||||
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
|
note_m = self._html_search_regex(
|
||||||
|
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
||||||
|
webpage, 'trailer-notice', default='')
|
||||||
if note_m:
|
if note_m:
|
||||||
raise ExtractorError(note_m)
|
raise ExtractorError(note_m)
|
||||||
|
|
||||||
@@ -245,15 +267,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
if msg.get('type') == 'error':
|
if msg.get('type') == 'error':
|
||||||
raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
|
raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
|
||||||
|
|
||||||
|
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
|
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
|
||||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||||
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
||||||
if not video_description:
|
if not video_description:
|
||||||
video_description = None
|
video_description = None
|
||||||
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
|
video_upload_date = self._html_search_regex(
|
||||||
|
[r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
|
||||||
|
webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
|
||||||
if video_upload_date:
|
if video_upload_date:
|
||||||
video_upload_date = unified_strdate(video_upload_date)
|
video_upload_date = unified_strdate(video_upload_date)
|
||||||
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
|
video_uploader = self._html_search_regex(
|
||||||
|
r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
|
||||||
|
'video_uploader', fatal=False)
|
||||||
|
|
||||||
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||||
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
||||||
@@ -279,13 +308,33 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||||
video_url = stream_info.find('./host').text
|
video_url = stream_info.find('./host').text
|
||||||
video_play_path = stream_info.find('./file').text
|
video_play_path = stream_info.find('./file').text
|
||||||
formats.append({
|
metadata = stream_info.find('./metadata')
|
||||||
|
format_info = {
|
||||||
|
'format': video_format,
|
||||||
|
'format_id': video_format,
|
||||||
|
'height': int_or_none(xpath_text(metadata, './height')),
|
||||||
|
'width': int_or_none(xpath_text(metadata, './width')),
|
||||||
|
}
|
||||||
|
|
||||||
|
if '.fplive.net/' in video_url:
|
||||||
|
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||||
|
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||||
|
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||||
|
netloc='v.lvlt.crcdn.net',
|
||||||
|
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
|
||||||
|
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||||
|
format_info.update({
|
||||||
|
'url': direct_video_url,
|
||||||
|
})
|
||||||
|
formats.append(format_info)
|
||||||
|
continue
|
||||||
|
|
||||||
|
format_info.update({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'play_path': video_play_path,
|
'play_path': video_play_path,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'format': video_format,
|
|
||||||
'format_id': video_format,
|
|
||||||
})
|
})
|
||||||
|
formats.append(format_info)
|
||||||
|
|
||||||
subtitles = self.extract_subtitles(video_id, webpage)
|
subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
|
|
||||||
@@ -301,7 +350,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollShowPlaylistIE(InfoExtractor):
|
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||||
IE_NAME = "crunchyroll:playlist"
|
IE_NAME = "crunchyroll:playlist"
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
|
||||||
|
|
||||||
|
|||||||
@@ -44,8 +44,8 @@ class DCNIE(InfoExtractor):
|
|||||||
title = video.get('title_en') or video['title_ar']
|
title = video.get('title_en') or video['title_ar']
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?'
|
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
|
||||||
+ compat_urllib_parse.urlencode({
|
compat_urllib_parse.urlencode({
|
||||||
'id': video['id'],
|
'id': video['id'],
|
||||||
'user_id': video['user_id'],
|
'user_id': video['user_id'],
|
||||||
'signature': video['signature'],
|
'signature': video['signature'],
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ from ..utils import qualities
|
|||||||
|
|
||||||
|
|
||||||
class DumpertIE(InfoExtractor):
|
class DumpertIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
|
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
|
||||||
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -20,11 +20,15 @@ class DumpertIE(InfoExtractor):
|
|||||||
'description': 'Niet schrikken hoor',
|
'description': 'Niet schrikken hoor',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
url = 'https://www.dumpert.nl/mediabase/' + video_id
|
||||||
req = compat_urllib_request.Request(url)
|
req = compat_urllib_request.Request(url)
|
||||||
req.add_header('Cookie', 'nsfw=1; cpc=10')
|
req.add_header('Cookie', 'nsfw=1; cpc=10')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
age_limit = 0 if age_restriction == 'allow_all' else 18
|
age_limit = 0 if age_restriction == 'allow_all' else 18
|
||||||
|
|
||||||
m3u8_data = self._download_json(
|
m3u8_data = self._download_json(
|
||||||
media['sources']['secure_m3u8']['auto'],
|
self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:'),
|
||||||
video_id, 'Downloading m3u8 JSON')
|
video_id, 'Downloading m3u8 JSON')
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
|
|||||||
@@ -71,8 +71,7 @@ class EroProfileIE(InfoExtractor):
|
|||||||
|
|
||||||
m = re.search(r'You must be logged in to view this video\.', webpage)
|
m = re.search(r'You must be logged in to view this video\.', webpage)
|
||||||
if m:
|
if m:
|
||||||
raise ExtractorError(
|
self.raise_login_required('This video requires login')
|
||||||
'This video requires login. Please specify a username and password and try again.', expected=True)
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||||
|
|||||||
@@ -10,12 +10,13 @@ from ..compat import (
|
|||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
encode_dict,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FC2IE(InfoExtractor):
|
class FC2IE(InfoExtractor):
|
||||||
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
|
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
|
||||||
IE_NAME = 'fc2'
|
IE_NAME = 'fc2'
|
||||||
_NETRC_MACHINE = 'fc2'
|
_NETRC_MACHINE = 'fc2'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -37,6 +38,9 @@ class FC2IE(InfoExtractor):
|
|||||||
'password': '(snip)',
|
'password': '(snip)',
|
||||||
'skip': 'requires actual password'
|
'skip': 'requires actual password'
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@@ -52,10 +56,7 @@ class FC2IE(InfoExtractor):
|
|||||||
'Submit': ' Login ',
|
'Submit': ' Login ',
|
||||||
}
|
}
|
||||||
|
|
||||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
|
||||||
# chokes on unicode
|
|
||||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
|
|
||||||
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
||||||
|
|
||||||
@@ -80,7 +81,7 @@ class FC2IE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
refer = url.replace('/content/', '/a/content/')
|
refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
|
||||||
|
|
||||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
|||||||
@@ -30,6 +30,10 @@ class FolketingetIE(InfoExtractor):
|
|||||||
'upload_date': '20141120',
|
'upload_date': '20141120',
|
||||||
'duration': 3960,
|
'duration': 3960,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
@@ -8,7 +10,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class FoxNewsIE(InfoExtractor):
|
class FoxNewsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
IE_DESC = 'Fox News and Fox Business Video'
|
||||||
|
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
|
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
|
||||||
@@ -42,13 +45,19 @@ class FoxNewsIE(InfoExtractor):
|
|||||||
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
|
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://video.foxbusiness.com/v/4442309889001',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
host = mobj.group('host')
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
|
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
|
||||||
|
|
||||||
item = video['channel']['item']
|
item = video['channel']['item']
|
||||||
title = item['title']
|
title = item['title']
|
||||||
|
|||||||
@@ -78,9 +78,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = info['titre']
|
||||||
|
subtitle = info.get('sous_titre')
|
||||||
|
if subtitle:
|
||||||
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info['titre'],
|
'title': title,
|
||||||
'description': clean_html(info['synopsis']),
|
'description': clean_html(info['synopsis']),
|
||||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||||
@@ -214,15 +219,15 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
},
|
},
|
||||||
# france5
|
# france5
|
||||||
{
|
{
|
||||||
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1',
|
||||||
'md5': '78f0f4064f9074438e660785bbf2c5d9',
|
'md5': 'f6c577df3806e26471b3d21631241fd0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '108961659',
|
'id': '123327454',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'C à dire ?!',
|
'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?',
|
||||||
'description': 'md5:1a4aeab476eb657bf57c4ff122129f81',
|
'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4',
|
||||||
'upload_date': '20140915',
|
'upload_date': '20150831',
|
||||||
'timestamp': 1410795000,
|
'timestamp': 1441035120,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# franceo
|
# franceo
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ from .vimeo import VimeoIE
|
|||||||
from .dailymotion import DailymotionCloudIE
|
from .dailymotion import DailymotionCloudIE
|
||||||
from .onionstudios import OnionStudiosIE
|
from .onionstudios import OnionStudiosIE
|
||||||
from .snagfilms import SnagFilmsEmbedIE
|
from .snagfilms import SnagFilmsEmbedIE
|
||||||
|
from .screenwavemedia import ScreenwaveMediaIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@@ -1001,6 +1002,16 @@ class GenericIE(InfoExtractor):
|
|||||||
'description': 'New experience with Acrobat DC',
|
'description': 'New experience with Acrobat DC',
|
||||||
'duration': 248.667,
|
'duration': 248.667,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
|
# ScreenwaveMedia embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
|
||||||
|
'md5': '24ace5baba0d35d55c6810b51f34e9e0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cinemasnob-55d26273809dd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'cinemasnob',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -1718,6 +1729,11 @@ class GenericIE(InfoExtractor):
|
|||||||
if snagfilms_url:
|
if snagfilms_url:
|
||||||
return self.url_result(snagfilms_url)
|
return self.url_result(snagfilms_url)
|
||||||
|
|
||||||
|
# Look for ScreenwaveMedia embeds
|
||||||
|
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
|
||||||
|
|
||||||
# Look for AdobeTVVideo embeds
|
# Look for AdobeTVVideo embeds
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||||
@@ -1781,7 +1797,7 @@ class GenericIE(InfoExtractor):
|
|||||||
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# HTML5 video
|
# HTML5 video
|
||||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||||
found = re.search(
|
found = re.search(
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -359,13 +360,8 @@ class GloboIE(InfoExtractor):
|
|||||||
self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
|
self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
duration = float_or_none(video['duration'], 1000)
|
|
||||||
like_count = video['likes']
|
|
||||||
uploader = video['channel']
|
|
||||||
uploader_id = video['channel_id']
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for resource in video['resources']:
|
for resource in video['resources']:
|
||||||
resource_id = resource.get('_id')
|
resource_id = resource.get('_id')
|
||||||
if not resource_id:
|
if not resource_id:
|
||||||
@@ -407,6 +403,11 @@ class GloboIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = float_or_none(video.get('duration'), 1000)
|
||||||
|
like_count = int_or_none(video.get('likes'))
|
||||||
|
uploader = video.get('channel')
|
||||||
|
uploader_id = video.get('channel_id')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|||||||
@@ -10,15 +10,16 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
encode_dict,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GorillaVidIE(InfoExtractor):
|
class GorillaVidIE(InfoExtractor):
|
||||||
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net'
|
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?P<host>(?:www\.)?
|
https?://(?P<host>(?:www\.)?
|
||||||
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net))/
|
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com))/
|
||||||
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@@ -67,13 +68,22 @@ class GorillaVidIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://movpod.in/0wguyyxi1yca',
|
'url': 'http://movpod.in/0wguyyxi1yca',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://filehoot.com/3ivfabn7573c.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3ivfabn7573c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
|
||||||
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id)
|
url = 'http://%s/%s' % (mobj.group('host'), video_id)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
@@ -87,7 +97,7 @@ class GorillaVidIE(InfoExtractor):
|
|||||||
if countdown:
|
if countdown:
|
||||||
self._sleep(countdown, video_id)
|
self._sleep(countdown, video_id)
|
||||||
|
|
||||||
post = compat_urllib_parse.urlencode(fields)
|
post = compat_urllib_parse.urlencode(encode_dict(fields))
|
||||||
|
|
||||||
req = compat_urllib_request.Request(url, post)
|
req = compat_urllib_request.Request(url, post)
|
||||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
@@ -95,7 +105,7 @@ class GorillaVidIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
[r'style="z-index: [0-9]+;">([^<]+)</span>', r'>Watch (.+) '],
|
[r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'>Watch (.+) '],
|
||||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
|
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ImgurIE(InfoExtractor):
|
class ImgurIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||||
@@ -97,3 +97,28 @@ class ImgurIE(InfoExtractor):
|
|||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ImgurAlbumIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://imgur.com/gallery/Q95ko',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Q95ko',
|
||||||
|
},
|
||||||
|
'playlist_count': 25,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
album_id = self._match_id(url)
|
||||||
|
|
||||||
|
album_images = self._download_json(
|
||||||
|
'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
|
||||||
|
album_id)['data']['images']
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://imgur.com/%s' % image['hash'])
|
||||||
|
for image in album_images if image.get('hash')]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, album_id)
|
||||||
|
|||||||
@@ -13,12 +13,24 @@ from ..utils import (
|
|||||||
|
|
||||||
class KalturaIE(InfoExtractor):
|
class KalturaIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:kaltura:|
|
(?:
|
||||||
https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
|
kaltura:(?P<partner_id_s>\d+):(?P<id_s>[0-9a-z_]+)|
|
||||||
)(?P<partner_id>\d+)
|
https?://
|
||||||
(?::|
|
(:?(?:www|cdnapisec)\.)?kaltura\.com/
|
||||||
/(?:[^/]+/)*?entry_id/
|
(?:
|
||||||
)(?P<id>[0-9a-z_]+)'''
|
(?:
|
||||||
|
# flash player
|
||||||
|
index\.php/kwidget/
|
||||||
|
(?:[^/]+/)*?wid/_(?P<partner_id>\d+)/
|
||||||
|
(?:[^/]+/)*?entry_id/(?P<id>[0-9a-z_]+)|
|
||||||
|
# html5 player
|
||||||
|
html5/html5lib/
|
||||||
|
(?:[^/]+/)*?entry_id/(?P<id_html5>[0-9a-z_]+)
|
||||||
|
.*\?.*\bwid=_(?P<partner_id_html5>\d+)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
'''
|
||||||
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
|
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -43,6 +55,10 @@ class KalturaIE(InfoExtractor):
|
|||||||
'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
|
'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
|
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
|
||||||
@@ -105,9 +121,9 @@ class KalturaIE(InfoExtractor):
|
|||||||
video_id, actions, note='Downloading video info JSON')
|
video_id, actions, note='Downloading video info JSON')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
|
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
|
||||||
|
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
|
||||||
|
|
||||||
info, source_data = self._get_video_info(entry_id, partner_id)
|
info, source_data = self._get_video_info(entry_id, partner_id)
|
||||||
|
|
||||||
@@ -126,7 +142,7 @@ class KalturaIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': entry_id,
|
||||||
'title': info['name'],
|
'title': info['name'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': info.get('description'),
|
'description': info.get('description'),
|
||||||
|
|||||||
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class KontrTubeIE(InfoExtractor):
|
class KontrTubeIE(InfoExtractor):
|
||||||
@@ -34,33 +37,28 @@ class KontrTubeIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, display_id, 'Downloading page')
|
url, display_id, 'Downloading page')
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._search_regex(
|
||||||
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
|
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._search_regex(
|
||||||
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>(.+?)</title>', webpage, 'video title')
|
r'(?s)<h2>(.+?)</h2>', webpage, 'title')
|
||||||
description = self._html_search_meta(
|
description = self._html_search_meta(
|
||||||
'description', webpage, 'video description')
|
'description', webpage, 'description')
|
||||||
|
|
||||||
mobj = re.search(
|
duration = self._search_regex(
|
||||||
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
|
||||||
webpage)
|
if duration:
|
||||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))
|
||||||
|
|
||||||
view_count = self._html_search_regex(
|
view_count = self._search_regex(
|
||||||
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
|
r'Просмотров: <em>([^<]+)</em>',
|
||||||
webpage, 'view count', fatal=False)
|
webpage, 'view count', fatal=False)
|
||||||
|
if view_count:
|
||||||
|
view_count = int_or_none(view_count.replace(' ', ''))
|
||||||
|
|
||||||
comment_count = None
|
comment_count = int_or_none(self._search_regex(
|
||||||
comment_str = self._html_search_regex(
|
r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))
|
||||||
r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
|
|
||||||
if comment_str.startswith('комментариев нет'):
|
|
||||||
comment_count = 0
|
|
||||||
else:
|
|
||||||
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
|
||||||
if mobj:
|
|
||||||
comment_count = mobj.group('total')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|||||||
@@ -25,6 +25,9 @@ class KrasViewIE(InfoExtractor):
|
|||||||
'duration': 27,
|
'duration': 27,
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Not accessible from Travis CI server',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -202,6 +202,7 @@ class KuwoSingerIE(InfoExtractor):
|
|||||||
'title': 'Ali',
|
'title': 'Ali',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 95,
|
'playlist_mincount': 95,
|
||||||
|
'skip': 'Regularly stalls travis build', # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -8,9 +8,9 @@ from ..utils import unified_strdate
|
|||||||
|
|
||||||
|
|
||||||
class LibsynIE(InfoExtractor):
|
class LibsynIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
|
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||||
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -19,12 +19,24 @@ class LibsynIE(InfoExtractor):
|
|||||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||||
'upload_date': '20150220',
|
'upload_date': '20150220',
|
||||||
|
'thumbnail': 're:^https?://.*',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/',
|
||||||
|
'md5': '6c5cb21acd622d754d3b1a92b582ce42',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3727166',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career',
|
||||||
|
'upload_date': '20150818',
|
||||||
|
'thumbnail': 're:^https?://.*',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('id')
|
||||||
|
url = m.group('mainurl')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
@@ -32,20 +44,18 @@ class LibsynIE(InfoExtractor):
|
|||||||
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||||
|
|
||||||
podcast_title = self._search_regex(
|
podcast_title = self._search_regex(
|
||||||
r'<h2>([^<]+)</h2>', webpage, 'title')
|
r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
|
||||||
episode_title = self._search_regex(
|
episode_title = self._search_regex(
|
||||||
r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
|
r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title')
|
||||||
|
|
||||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<div id="info_text_body">(.+?)</div>', webpage,
|
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||||
'description', fatal=False)
|
'description', default=None)
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||||
webpage, 'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
release_date = unified_strdate(self._search_regex(
|
release_date = unified_strdate(self._search_regex(
|
||||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||||
|
|
||||||
|
|||||||
@@ -118,9 +118,7 @@ class LyndaIE(LyndaBaseIE):
|
|||||||
'lynda returned error: %s' % video_json['Message'], expected=True)
|
'lynda returned error: %s' % video_json['Message'], expected=True)
|
||||||
|
|
||||||
if video_json['HasAccess'] is False:
|
if video_json['HasAccess'] is False:
|
||||||
raise ExtractorError(
|
self.raise_login_required('Video %s is only available for members' % video_id)
|
||||||
'Video %s is only available for members. '
|
|
||||||
% video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)
|
|
||||||
|
|
||||||
video_id = compat_str(video_json['ID'])
|
video_id = compat_str(video_json['ID'])
|
||||||
duration = video_json['DurationInSeconds']
|
duration = video_json['DurationInSeconds']
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ class MailRuIE(InfoExtractor):
|
|||||||
'uploader_id': 'sonypicturesrus@mail.ru',
|
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||||
'duration': 184,
|
'duration': 184,
|
||||||
},
|
},
|
||||||
|
'skip': 'Not accessible from Travis CI server',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
|
'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
|
||||||
@@ -39,6 +40,7 @@ class MailRuIE(InfoExtractor):
|
|||||||
'uploader_id': 'hitech@corp.mail.ru',
|
'uploader_id': 'hitech@corp.mail.ru',
|
||||||
'duration': 245,
|
'duration': 245,
|
||||||
},
|
},
|
||||||
|
'skip': 'Not accessible from Travis CI server',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -18,12 +18,12 @@ class TechTVMITIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
||||||
'md5': '1f8cb3e170d41fd74add04d3c9330e5f',
|
'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '25418',
|
'id': '25418',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'MIT DNA Learning Center Set',
|
'title': 'MIT DNA and Protein Sets',
|
||||||
'description': 'md5:82313335e8a8a3f243351ba55bc1b474',
|
'description': 'md5:46f5c69ce434f0a97e7c628cc142802d',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -33,8 +33,8 @@ class TechTVMITIE(InfoExtractor):
|
|||||||
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
||||||
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
|
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
|
||||||
|
|
||||||
base_url = self._search_regex(
|
base_url = self._proto_relative_url(self._search_regex(
|
||||||
r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url')
|
r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url'), 'http:')
|
||||||
formats_json = self._search_regex(
|
formats_json = self._search_regex(
|
||||||
r'bitrates: (\[.+?\])', raw_page, 'video formats')
|
r'bitrates: (\[.+?\])', raw_page, 'video formats')
|
||||||
formats_mit = json.loads(formats_json)
|
formats_mit = json.loads(formats_json)
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
return [{'url': url, 'ext': 'mp4'}]
|
return [{'url': url, 'ext': 'mp4'}]
|
||||||
|
|
||||||
def _extract_video_formats(self, mdoc, mtvn_id):
|
def _extract_video_formats(self, mdoc, mtvn_id):
|
||||||
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
|
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
|
||||||
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
|
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
|
||||||
self.to_screen('The normal version is not available from your '
|
self.to_screen('The normal version is not available from your '
|
||||||
'country, trying with the mobile version')
|
'country, trying with the mobile version')
|
||||||
@@ -114,7 +114,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
# Remove the templates, like &device={device}
|
# Remove the templates, like &device={device}
|
||||||
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
|
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
|
||||||
if 'acceptMethods' not in mediagen_url:
|
if 'acceptMethods' not in mediagen_url:
|
||||||
mediagen_url += '&acceptMethods=fms'
|
mediagen_url += '&' if '?' in mediagen_url else '?'
|
||||||
|
mediagen_url += 'acceptMethods=fms'
|
||||||
|
|
||||||
mediagen_doc = self._download_xml(mediagen_url, video_id,
|
mediagen_doc = self._download_xml(mediagen_url, video_id,
|
||||||
'Downloading video urls')
|
'Downloading video urls')
|
||||||
@@ -141,7 +142,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
if title_el is None:
|
if title_el is None:
|
||||||
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
||||||
if title_el is None:
|
if title_el is None:
|
||||||
title_el = itemdoc.find('.//title')
|
title_el = itemdoc.find('.//title') or itemdoc.find('./title')
|
||||||
if title_el.text is None:
|
if title_el.text is None:
|
||||||
title_el = None
|
title_el = None
|
||||||
|
|
||||||
@@ -174,8 +175,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
if self._LANG:
|
if self._LANG:
|
||||||
info_url += 'lang=%s&' % self._LANG
|
info_url += 'lang=%s&' % self._LANG
|
||||||
info_url += data
|
info_url += data
|
||||||
|
return self._get_videos_info_from_url(info_url, video_id)
|
||||||
|
|
||||||
|
def _get_videos_info_from_url(self, url, video_id):
|
||||||
idoc = self._download_xml(
|
idoc = self._download_xml(
|
||||||
info_url, video_id,
|
url, video_id,
|
||||||
'Downloading info', transform_source=fix_xml_ampersands)
|
'Downloading info', transform_source=fix_xml_ampersands)
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
[self._get_video_info(item) for item in idoc.findall('.//item')])
|
[self._get_video_info(item) for item in idoc.findall('.//item')])
|
||||||
@@ -288,3 +292,65 @@ class MTVIggyIE(MTVServicesInfoExtractor):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
|
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
|
||||||
|
|
||||||
|
|
||||||
|
class MTVDEIE(MTVServicesInfoExtractor):
|
||||||
|
IE_NAME = 'mtv.de'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'music_video-a50bc5f0b3aa4b3190aa',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'MusicVideo_cro-traum',
|
||||||
|
'description': 'Cro - Traum',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
|
||||||
|
'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'local_playlist-f5ae778b9832cc837189',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# single video in pagePlaylist with different id
|
||||||
|
'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'local_playlist-4e760566473c4c8c5344',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1',
|
||||||
|
'description': 'MTV Movies Supercut',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
playlist = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
# news pages contain single video in playlist with different id
|
||||||
|
if len(playlist) == 1:
|
||||||
|
return self._get_videos_info_from_url(playlist[0]['mrss'], video_id)
|
||||||
|
|
||||||
|
for item in playlist:
|
||||||
|
item_id = item.get('id')
|
||||||
|
if item_id and compat_str(item_id) == video_id:
|
||||||
|
return self._get_videos_info_from_url(item['mrss'], video_id)
|
||||||
|
|||||||
@@ -1,63 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class MusicVaultIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
|
|
||||||
'md5': '3adcbdb3dcc02d647539e53f284ba171',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1010863',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'uploader_id': 'the-allman-brothers-band',
|
|
||||||
'title': 'Straight from the Heart',
|
|
||||||
'duration': 244,
|
|
||||||
'uploader': 'The Allman Brothers Band',
|
|
||||||
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
|
||||||
'upload_date': '20131219',
|
|
||||||
'location': 'Capitol Theatre (Passaic, NJ)',
|
|
||||||
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
|
|
||||||
'timestamp': int,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
display_id = mobj.group('display_id')
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
|
||||||
r'<meta itemprop="thumbnail" content="([^"]+)"',
|
|
||||||
webpage, 'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
data_div = self._search_regex(
|
|
||||||
r'(?s)<div class="data">(.*?)</div>', webpage, 'data fields')
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<h2.*?>(.*?)</h2>', data_div, 'title')
|
|
||||||
location = self._html_search_regex(
|
|
||||||
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
|
|
||||||
|
|
||||||
kaltura_id = self._search_regex(
|
|
||||||
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
|
|
||||||
webpage, 'kaltura ID')
|
|
||||||
wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': mobj.group('id'),
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'kaltura:%s:%s' % (wid, kaltura_id),
|
|
||||||
'ie_key': 'Kaltura',
|
|
||||||
'display_id': display_id,
|
|
||||||
'uploader_id': mobj.group('uploader_id'),
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': self._html_search_meta('description', webpage),
|
|
||||||
'location': location,
|
|
||||||
'title': title,
|
|
||||||
'uploader': uploader,
|
|
||||||
}
|
|
||||||
58
youtube_dl/extractor/mwave.py
Normal file
58
youtube_dl/extractor/mwave.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MwaveIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
|
||||||
|
'md5': 'c930e27b7720aaa3c9d0018dfc8ff6cc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '168859',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '[M COUNTDOWN] SISTAR - SHAKE IT',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'M COUNTDOWN',
|
||||||
|
'duration': 206,
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
vod_info = self._download_json(
|
||||||
|
'http://mwave.interest.me/onair/vod_info.m?vodtype=CL§orid=&endinfo=Y&id=%s' % video_id,
|
||||||
|
video_id, 'Download vod JSON')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for num, cdn_info in enumerate(vod_info['cdn']):
|
||||||
|
stream_url = cdn_info.get('url')
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
|
stream_name = cdn_info.get('name') or compat_str(num)
|
||||||
|
f4m_stream = self._download_json(
|
||||||
|
stream_url, video_id,
|
||||||
|
'Download %s stream JSON' % stream_name)
|
||||||
|
f4m_url = f4m_stream.get('fileurl')
|
||||||
|
if not f4m_url:
|
||||||
|
continue
|
||||||
|
formats.extend(
|
||||||
|
self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': vod_info['title'],
|
||||||
|
'thumbnail': vod_info.get('cover'),
|
||||||
|
'uploader': vod_info.get('program_title'),
|
||||||
|
'duration': parse_duration(vod_info.get('time')),
|
||||||
|
'view_count': int_or_none(vod_info.get('hit')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
@@ -236,3 +236,28 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
'url': info['videoAssets'][-1]['publicUrl'],
|
'url': info['videoAssets'][-1]['publicUrl'],
|
||||||
'ie_key': 'ThePlatform',
|
'ie_key': 'ThePlatform',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MSNBCIE(InfoExtractor):
|
||||||
|
# https URLs redirect to corresponding http ones
|
||||||
|
_VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||||
|
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'n_hayes_Aimm_140801_272214',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The chaotic GOP immigration vote',
|
||||||
|
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1406937606,
|
||||||
|
'upload_date': '20140802',
|
||||||
|
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
embed_url = self._html_search_meta('embedURL', webpage)
|
||||||
|
return self.url_result(embed_url)
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from ..compat import (
|
|||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
encode_dict,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
@@ -100,10 +101,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'mail': username,
|
'mail': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
}
|
}
|
||||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
|
||||||
# chokes on unicode
|
|
||||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
|
|
||||||
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
'https://secure.nicovideo.jp/secure/login', login_data)
|
'https://secure.nicovideo.jp/secure/login', login_data)
|
||||||
login_results = self._download_webpage(
|
login_results = self._download_webpage(
|
||||||
|
|||||||
@@ -130,10 +130,16 @@ class NowTVIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
|
'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
display_id_split = display_id.split('/')
|
||||||
|
if len(display_id) > 2:
|
||||||
|
display_id = '/'.join((display_id_split[0], display_id_split[-1]))
|
||||||
|
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
|
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
|
|||||||
IE_NAME = 'nowvideo'
|
IE_NAME = 'nowvideo'
|
||||||
IE_DESC = 'NowVideo'
|
IE_DESC = 'NowVideo'
|
||||||
|
|
||||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co|li)'}
|
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'}
|
||||||
|
|
||||||
_HOST = 'www.nowvideo.ch'
|
_HOST = 'www.nowvideo.ch'
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class OdnoklassnikiIE(InfoExtractor):
|
class OdnoklassnikiIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# metadata in JSON
|
# metadata in JSON
|
||||||
'url': 'http://ok.ru/video/20079905452',
|
'url': 'http://ok.ru/video/20079905452',
|
||||||
@@ -43,9 +43,27 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
|
||||||
|
'url': 'http://ok.ru/video/64211978996595-1',
|
||||||
|
'md5': '5d7475d428845cd2e13bae6f1a992278',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '64211978996595-1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Космическая среда от 26 августа 2015',
|
||||||
|
'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
|
||||||
|
'duration': 440,
|
||||||
|
'upload_date': '20150826',
|
||||||
|
'uploader_id': '750099571',
|
||||||
|
'uploader': 'Алина П',
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ok.ru/video/20648036891',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -56,7 +74,8 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
|
|
||||||
player = self._parse_json(
|
player = self._parse_json(
|
||||||
unescapeHTML(self._search_regex(
|
unescapeHTML(self._search_regex(
|
||||||
r'data-attributes="([^"]+)"', webpage, 'player')),
|
r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
|
||||||
|
webpage, 'player', group='player')),
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
flashvars = player['flashvars']
|
flashvars = player['flashvars']
|
||||||
@@ -89,16 +108,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
|
|
||||||
like_count = int_or_none(metadata.get('likeCount'))
|
like_count = int_or_none(metadata.get('likeCount'))
|
||||||
|
|
||||||
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
|
info = {
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': f['url'],
|
|
||||||
'ext': 'mp4',
|
|
||||||
'format_id': f['name'],
|
|
||||||
'quality': quality(f['name']),
|
|
||||||
} for f in metadata['videos']]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
@@ -108,5 +118,24 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'formats': formats,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if metadata.get('provider') == 'USER_YOUTUBE':
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': movie['contentId'],
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': f['url'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': f['name'],
|
||||||
|
'quality': quality(f['name']),
|
||||||
|
} for f in metadata['videos']]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info['formats'] = formats
|
||||||
|
return info
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ class PlaytvakIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
info_url = compat_urlparse.urlunparse(
|
info_url = compat_urlparse.urlunparse(
|
||||||
parsed_url._replace(query = compat_urllib_parse.urlencode(qs, True)))
|
parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
|
||||||
|
|
||||||
json_info = self._download_json(
|
json_info = self._download_json(
|
||||||
info_url, video_id,
|
info_url, video_id,
|
||||||
|
|||||||
207
youtube_dl/extractor/pluralsight.py
Normal file
207
youtube_dl/extractor/pluralsight.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PluralsightIE(InfoExtractor):
|
||||||
|
IE_NAME = 'pluralsight'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)'
|
||||||
|
_LOGIN_URL = 'https://www.pluralsight.com/id/'
|
||||||
|
_NETRC_MACHINE = 'pluralsight'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
|
||||||
|
'md5': '4d458cf5cf4c593788672419a8dd4cf8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Management of SQL Server - Demo Monitoring',
|
||||||
|
'duration': 338,
|
||||||
|
},
|
||||||
|
'skip': 'Requires pluralsight account credentials',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
self.raise_login_required('Pluralsight account is required')
|
||||||
|
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None, 'Downloading login page')
|
||||||
|
|
||||||
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
|
login_form.update({
|
||||||
|
'Username': username.encode('utf-8'),
|
||||||
|
'Password': password.encode('utf-8'),
|
||||||
|
})
|
||||||
|
|
||||||
|
post_url = self._search_regex(
|
||||||
|
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||||
|
'post url', default=self._LOGIN_URL, group='url')
|
||||||
|
|
||||||
|
if not post_url.startswith('http'):
|
||||||
|
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
|
response = self._download_webpage(
|
||||||
|
request, None, 'Logging in as %s' % username)
|
||||||
|
|
||||||
|
error = self._search_regex(
|
||||||
|
r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
|
||||||
|
response, 'error message', default=None)
|
||||||
|
if error:
|
||||||
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
author = mobj.group('author')
|
||||||
|
name = mobj.group('name')
|
||||||
|
clip_id = mobj.group('clip')
|
||||||
|
course = mobj.group('course')
|
||||||
|
|
||||||
|
display_id = '%s-%s' % (name, clip_id)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
collection = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
|
||||||
|
webpage, 'modules'),
|
||||||
|
display_id)
|
||||||
|
|
||||||
|
module, clip = None, None
|
||||||
|
|
||||||
|
for module_ in collection:
|
||||||
|
if module_.get('moduleName') == name:
|
||||||
|
module = module_
|
||||||
|
for clip_ in module_.get('clips', []):
|
||||||
|
clip_index = clip_.get('clipIndex')
|
||||||
|
if clip_index is None:
|
||||||
|
continue
|
||||||
|
if compat_str(clip_index) == clip_id:
|
||||||
|
clip = clip_
|
||||||
|
break
|
||||||
|
|
||||||
|
if not clip:
|
||||||
|
raise ExtractorError('Unable to resolve clip')
|
||||||
|
|
||||||
|
QUALITIES = {
|
||||||
|
'low': {'width': 640, 'height': 480},
|
||||||
|
'medium': {'width': 848, 'height': 640},
|
||||||
|
'high': {'width': 1024, 'height': 768},
|
||||||
|
}
|
||||||
|
|
||||||
|
ALLOWED_QUALITIES = (
|
||||||
|
('webm', ('high',)),
|
||||||
|
('mp4', ('low', 'medium', 'high',)),
|
||||||
|
)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for ext, qualities in ALLOWED_QUALITIES:
|
||||||
|
for quality in qualities:
|
||||||
|
f = QUALITIES[quality].copy()
|
||||||
|
clip_post = {
|
||||||
|
'a': author,
|
||||||
|
'cap': 'false',
|
||||||
|
'cn': clip_id,
|
||||||
|
'course': course,
|
||||||
|
'lc': 'en',
|
||||||
|
'm': name,
|
||||||
|
'mt': ext,
|
||||||
|
'q': '%dx%d' % (f['width'], f['height']),
|
||||||
|
}
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
'http://www.pluralsight.com/training/Player/ViewClip',
|
||||||
|
json.dumps(clip_post).encode('utf-8'))
|
||||||
|
request.add_header('Content-Type', 'application/json;charset=utf-8')
|
||||||
|
format_id = '%s-%s' % (ext, quality)
|
||||||
|
clip_url = self._download_webpage(
|
||||||
|
request, display_id, 'Downloading %s URL' % format_id, fatal=False)
|
||||||
|
if not clip_url:
|
||||||
|
continue
|
||||||
|
f.update({
|
||||||
|
'url': clip_url,
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
# TODO: captions
|
||||||
|
# http://www.pluralsight.com/training/Player/ViewClip + cap = true
|
||||||
|
# or
|
||||||
|
# http://www.pluralsight.com/training/Player/Captions
|
||||||
|
# { a = author, cn = clip_id, lc = end, m = name }
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': clip['clipName'],
|
||||||
|
'title': '%s - %s' % (module['title'], clip['title']),
|
||||||
|
'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')),
|
||||||
|
'creator': author,
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PluralsightCourseIE(InfoExtractor):
|
||||||
|
IE_NAME = 'pluralsight:course'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
# Free course from Pluralsight Starter Subscription for Microsoft TechNet
|
||||||
|
# https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
|
||||||
|
'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hosting-sql-server-windows-azure-iaas',
|
||||||
|
'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals',
|
||||||
|
'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
|
||||||
|
},
|
||||||
|
'playlist_count': 31,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_id = self._match_id(url)
|
||||||
|
|
||||||
|
# TODO: PSM cookie
|
||||||
|
|
||||||
|
course = self._download_json(
|
||||||
|
'http://www.pluralsight.com/data/course/%s' % course_id,
|
||||||
|
course_id, 'Downloading course JSON')
|
||||||
|
|
||||||
|
title = course['title']
|
||||||
|
description = course.get('description') or course.get('shortDescription')
|
||||||
|
|
||||||
|
course_data = self._download_json(
|
||||||
|
'http://www.pluralsight.com/data/course/content/%s' % course_id,
|
||||||
|
course_id, 'Downloading course data JSON')
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for module in course_data:
|
||||||
|
for clip in module.get('clips', []):
|
||||||
|
player_parameters = clip.get('playerParameters')
|
||||||
|
if not player_parameters:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
'http://www.pluralsight.com/training/player?%s' % player_parameters,
|
||||||
|
'Pluralsight'))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, course_id, title, description)
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@@ -8,22 +9,28 @@ class RTL2IE(InfoExtractor):
|
|||||||
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
|
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
|
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
|
||||||
'md5': 'bfcc179030535b08dc2b36b469b5adc7',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'folge-203-0',
|
'id': 'folge-203-0',
|
||||||
'ext': 'f4v',
|
'ext': 'f4v',
|
||||||
'title': 'GRIP sucht den Sommerkönig',
|
'title': 'GRIP sucht den Sommerkönig',
|
||||||
'description': 'Matthias, Det und Helge treten gegeneinander an.'
|
'description': 'Matthias, Det und Helge treten gegeneinander an.'
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
|
'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
|
||||||
'md5': 'ffcd517d2805b57ce11a58a2980c2b02',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '21040-anna-erwischt-alex',
|
'id': '21040-anna-erwischt-alex',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Anna erwischt Alex!',
|
'title': 'Anna erwischt Alex!',
|
||||||
'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
|
'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -34,12 +41,18 @@ class RTL2IE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
vico_id = self._html_search_regex(
|
mobj = re.search(
|
||||||
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
|
r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
|
||||||
vivi_id = self._html_search_regex(
|
webpage)
|
||||||
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
|
if mobj:
|
||||||
|
vico_id = mobj.group('vico_id')
|
||||||
|
vivi_id = mobj.group('vivi_id')
|
||||||
|
else:
|
||||||
|
vico_id = self._html_search_regex(
|
||||||
|
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
|
||||||
|
vivi_id = self._html_search_regex(
|
||||||
|
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
|
||||||
info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
|
info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
|
||||||
webpage = self._download_webpage(info_url, '')
|
|
||||||
|
|
||||||
info = self._download_json(info_url, video_id)
|
info = self._download_json(info_url, video_id)
|
||||||
video_info = info['video']
|
video_info = info['video']
|
||||||
|
|||||||
@@ -18,6 +18,10 @@ class RTPIE(InfoExtractor):
|
|||||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import re
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urllib_request, compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
@@ -102,7 +102,9 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
if info['state'] == 'DESPU':
|
if info['state'] == 'DESPU':
|
||||||
raise ExtractorError('The video is no longer available', expected=True)
|
raise ExtractorError('The video is no longer available', expected=True)
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
png_request = compat_urllib_request.Request(png_url)
|
||||||
|
png_request.add_header('Referer', url)
|
||||||
|
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||||
video_url = _decrypt_url(png)
|
video_url = _decrypt_url(png)
|
||||||
if not video_url.endswith('.f4m'):
|
if not video_url.endswith('.f4m'):
|
||||||
auth_url = video_url.replace(
|
auth_url = video_url.replace(
|
||||||
|
|||||||
@@ -6,19 +6,19 @@ from ..compat import compat_urllib_parse_urlparse
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
xpath_attr,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RuutuIE(InfoExtractor):
|
class RuutuIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?ruutu\.fi/ohjelmat/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?ruutu\.fi/video/(?P<id>\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
|
'url': 'http://www.ruutu.fi/video/2058907',
|
||||||
'md5': 'ab2093f39be1ca8581963451b3c0234f',
|
'md5': 'ab2093f39be1ca8581963451b3c0234f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2058907',
|
'id': '2058907',
|
||||||
'display_id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
|
'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
|
||||||
'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
|
'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
|
||||||
@@ -28,14 +28,13 @@ class RuutuIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa',
|
'url': 'http://www.ruutu.fi/video/2057306',
|
||||||
'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
|
'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2057306',
|
'id': '2057306',
|
||||||
'display_id': 'superpesis-katso-koko-kausi-ruudussa',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Superpesis: katso koko kausi Ruudussa',
|
'title': 'Superpesis: katso koko kausi Ruudussa',
|
||||||
'description': 'md5:44c44a99fdbe5b380ab74ebd75f0af77',
|
'description': 'md5:da2736052fef3b2bd5e0005e63c25eac',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'duration': 40,
|
'duration': 40,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
@@ -44,29 +43,10 @@ class RuutuIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
video_xml = self._download_xml(
|
||||||
|
'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id)
|
||||||
video_id = self._search_regex(
|
|
||||||
r'data-media-id="(\d+)"', webpage, 'media id')
|
|
||||||
|
|
||||||
video_xml_url = None
|
|
||||||
|
|
||||||
media_data = self._search_regex(
|
|
||||||
r'jQuery\.extend\([^,]+,\s*(.+?)\);', webpage,
|
|
||||||
'media data', default=None)
|
|
||||||
if media_data:
|
|
||||||
media_json = self._parse_json(media_data, display_id, fatal=False)
|
|
||||||
if media_json:
|
|
||||||
xml_url = media_json.get('ruutuplayer', {}).get('xmlUrl')
|
|
||||||
if xml_url:
|
|
||||||
video_xml_url = xml_url.replace('{ID}', video_id)
|
|
||||||
|
|
||||||
if not video_xml_url:
|
|
||||||
video_xml_url = 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id
|
|
||||||
|
|
||||||
video_xml = self._download_xml(video_xml_url, video_id)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
processed_urls = []
|
processed_urls = []
|
||||||
@@ -109,10 +89,9 @@ class RuutuIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True),
|
||||||
'title': self._og_search_title(webpage),
|
'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'),
|
||||||
'description': self._og_search_description(webpage),
|
'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
|
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
|
||||||
'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
|
'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ from ..utils import (
|
|||||||
class SafariBaseIE(InfoExtractor):
|
class SafariBaseIE(InfoExtractor):
|
||||||
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
||||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
|
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
|
||||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
|
|
||||||
_NETRC_MACHINE = 'safari'
|
_NETRC_MACHINE = 'safari'
|
||||||
|
|
||||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
|
_API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
|
||||||
@@ -37,9 +36,7 @@ class SafariBaseIE(InfoExtractor):
|
|||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
raise ExtractorError(
|
self.raise_login_required('safaribooksonline.com account is required')
|
||||||
self._ACCOUNT_CREDENTIALS_HINT,
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
headers = std_headers
|
headers = std_headers
|
||||||
if 'Referer' not in headers:
|
if 'Referer' not in headers:
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ScreenwaveMediaIE(InfoExtractor):
|
class ScreenwaveMediaIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
|
_VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
|
||||||
|
EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
|
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -33,7 +33,7 @@ class ScreenwaveMediaIE(InfoExtractor):
|
|||||||
'http://player.screenwavemedia.com/player.js',
|
'http://player.screenwavemedia.com/player.js',
|
||||||
video_id, 'Downloading playerconfig webpage')
|
video_id, 'Downloading playerconfig webpage')
|
||||||
|
|
||||||
videoserver = self._search_regex(r"\[ipaddress\]\s*=>\s*([\d\.]+)", playerdata, 'videoserver')
|
videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver')
|
||||||
|
|
||||||
sources = self._parse_json(
|
sources = self._parse_json(
|
||||||
js_to_json(
|
js_to_json(
|
||||||
@@ -56,6 +56,7 @@ class ScreenwaveMediaIE(InfoExtractor):
|
|||||||
|
|
||||||
# Fallback to hardcoded sources if JS changes again
|
# Fallback to hardcoded sources if JS changes again
|
||||||
if not sources:
|
if not sources:
|
||||||
|
self.report_warning('Falling back to a hardcoded list of streams')
|
||||||
sources = [{
|
sources = [{
|
||||||
'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
|
'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
|
||||||
'type': 'mp4',
|
'type': 'mp4',
|
||||||
|
|||||||
@@ -14,17 +14,28 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class SharedIE(InfoExtractor):
|
class SharedIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
|
IE_DESC = 'shared.sx and vivo.sx'
|
||||||
|
_VALID_URL = r'http://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://shared.sx/0060718775',
|
'url': 'http://shared.sx/0060718775',
|
||||||
'md5': '106fefed92a8a2adb8c98e6a0652f49b',
|
'md5': '106fefed92a8a2adb8c98e6a0652f49b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0060718775',
|
'id': '0060718775',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bmp4',
|
'title': 'Bmp4',
|
||||||
|
'filesize': 1720110,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://vivo.sx/d7ddda0e78',
|
||||||
|
'md5': '15b3af41be0b4fe01f4df075c2678b2c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd7ddda0e78',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chicken',
|
||||||
|
'filesize': 528031,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|||||||
@@ -330,10 +330,7 @@ class SmotriBroadcastIE(InfoExtractor):
|
|||||||
|
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
raise ExtractorError(
|
self.raise_login_required('Erotic broadcasts allowed only for registered users')
|
||||||
'Erotic broadcasts allowed only for registered users, '
|
|
||||||
'use --username and --password options to provide account credentials.',
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
login_form = {
|
login_form = {
|
||||||
'login-hint53': '1',
|
'login-hint53': '1',
|
||||||
|
|||||||
@@ -309,7 +309,7 @@ class SoundcloudUserIE(SoundcloudIE):
|
|||||||
'id': '114582580',
|
'id': '114582580',
|
||||||
'title': 'The Akashic Chronicler (All)',
|
'title': 'The Akashic Chronicler (All)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 112,
|
'playlist_mincount': 111,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
|
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -330,14 +330,14 @@ class SoundcloudUserIE(SoundcloudIE):
|
|||||||
'id': '114582580',
|
'id': '114582580',
|
||||||
'title': 'The Akashic Chronicler (Reposts)',
|
'title': 'The Akashic Chronicler (Reposts)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 7,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
|
'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '114582580',
|
||||||
'title': 'The Akashic Chronicler (Likes)',
|
'title': 'The Akashic Chronicler (Likes)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 333,
|
'playlist_mincount': 321,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|||||||
@@ -16,8 +16,9 @@ from ..aes import aes_decrypt_text
|
|||||||
|
|
||||||
|
|
||||||
class SpankwireIE(InfoExtractor):
|
class SpankwireIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
# download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
|
||||||
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
|
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
|
||||||
'md5': '8bbfde12b101204b39e4b9fe7eb67095',
|
'md5': '8bbfde12b101204b39e4b9fe7eb67095',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -30,14 +31,27 @@ class SpankwireIE(InfoExtractor):
|
|||||||
'upload_date': '20070507',
|
'upload_date': '20070507',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# download URL pattern: */mp4_<format_id>_<video_id>.mp4
|
||||||
|
'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
|
||||||
|
'md5': '09b3c20833308b736ae8902db2f8d7e6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1921551',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Titcums Compiloation I',
|
||||||
|
'description': 'cum on tits',
|
||||||
|
'uploader': 'dannyh78999',
|
||||||
|
'uploader_id': '3056053',
|
||||||
|
'upload_date': '20150822',
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('id')
|
||||||
url = 'http://www.' + mobj.group('url')
|
|
||||||
|
|
||||||
req = compat_urllib_request.Request(url)
|
req = compat_urllib_request.Request('http://www.' + mobj.group('url'))
|
||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
@@ -54,7 +68,7 @@ class SpankwireIE(InfoExtractor):
|
|||||||
r'by:\s*<a [^>]*>(.+?)</a>',
|
r'by:\s*<a [^>]*>(.+?)</a>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"',
|
r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',
|
||||||
webpage, 'uploader id', fatal=False)
|
webpage, 'uploader id', fatal=False)
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
r'</a> on (.+?) at \d+:\d+',
|
r'</a> on (.+?) at \d+:\d+',
|
||||||
@@ -67,9 +81,10 @@ class SpankwireIE(InfoExtractor):
|
|||||||
r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
|
r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
|
||||||
webpage, 'comment count', fatal=False))
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
video_urls = list(map(
|
videos = re.findall(
|
||||||
compat_urllib_parse_unquote,
|
r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
|
||||||
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
|
heights = [int(video[0]) for video in videos]
|
||||||
|
video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
|
||||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||||
password = self._search_regex(
|
password = self._search_regex(
|
||||||
r'flashvars\.video_title = "([^"]+)',
|
r'flashvars\.video_title = "([^"]+)',
|
||||||
@@ -79,21 +94,22 @@ class SpankwireIE(InfoExtractor):
|
|||||||
video_urls))
|
video_urls))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video_url in video_urls:
|
for height, video_url in zip(heights, video_urls):
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
format = path.split('/')[4].split('_')[:2]
|
_, quality = path.split('/')[4].split('_')[:2]
|
||||||
resolution, bitrate_str = format
|
f = {
|
||||||
format = "-".join(format)
|
|
||||||
height = int(resolution.rstrip('Pp'))
|
|
||||||
tbr = int(bitrate_str.rstrip('Kk'))
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'resolution': resolution,
|
|
||||||
'format': format,
|
|
||||||
'tbr': tbr,
|
|
||||||
'height': height,
|
'height': height,
|
||||||
'format_id': format,
|
}
|
||||||
})
|
tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
|
||||||
|
if tbr:
|
||||||
|
f.update({
|
||||||
|
'tbr': int(tbr),
|
||||||
|
'format_id': '%dp' % height,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
f['format_id'] = quality
|
||||||
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from .mitele import MiTeleIE
|
|||||||
|
|
||||||
class TelecincoIE(MiTeleIE):
|
class TelecincoIE(MiTeleIE):
|
||||||
IE_NAME = 'telecinco.es'
|
IE_NAME = 'telecinco.es'
|
||||||
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
|
_VALID_URL = r'https?://www\.telecinco\.es/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||||
@@ -23,4 +23,7 @@ class TelecincoIE(MiTeleIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import time
|
import time
|
||||||
import hmac
|
import hmac
|
||||||
import binascii
|
import binascii
|
||||||
@@ -10,7 +10,8 @@ import hashlib
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@@ -18,12 +19,69 @@ from ..utils import (
|
|||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
url_basename,
|
||||||
|
float_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
|
||||||
|
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||||
|
|
||||||
|
|
||||||
class ThePlatformIE(InfoExtractor):
|
class ThePlatformBaseIE(InfoExtractor):
|
||||||
|
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||||
|
meta = self._download_xml(smil_url, video_id, note=note)
|
||||||
|
try:
|
||||||
|
error_msg = next(
|
||||||
|
n.attrib['abstract']
|
||||||
|
for n in meta.findall(_x('.//smil:ref'))
|
||||||
|
if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ExtractorError(error_msg, expected=True)
|
||||||
|
|
||||||
|
formats = self._parse_smil_formats(
|
||||||
|
meta, smil_url, video_id, namespace=default_ns,
|
||||||
|
# the parameters are from syfy.com, other sites may use others,
|
||||||
|
# they also work for nbc.com
|
||||||
|
f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
|
||||||
|
transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
|
||||||
|
|
||||||
|
for _format in formats:
|
||||||
|
ext = determine_ext(_format['url'])
|
||||||
|
if ext == 'once':
|
||||||
|
_format['ext'] = 'mp4'
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = self._parse_smil_subtitles(meta, default_ns)
|
||||||
|
|
||||||
|
return formats, subtitles
|
||||||
|
|
||||||
|
def get_metadata(self, path, video_id):
|
||||||
|
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
||||||
|
info = self._download_json(info_url, video_id)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
captions = info.get('captions')
|
||||||
|
if isinstance(captions, list):
|
||||||
|
for caption in captions:
|
||||||
|
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
|
||||||
|
subtitles[lang] = [{
|
||||||
|
'ext': 'srt' if mime == 'text/srt' else 'ttml',
|
||||||
|
'url': src,
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'title': info['title'],
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'description': info['description'],
|
||||||
|
'thumbnail': info['defaultThumbnailUrl'],
|
||||||
|
'duration': int_or_none(info.get('duration'), 1000),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ThePlatformIE(ThePlatformBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||||
(?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
(?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||||
@@ -67,6 +125,20 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
|
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
|
||||||
|
'md5': '734f3790fb5fc4903da391beeebc4836',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'tdy_or_siri_150701',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'iPhone Siri’s sassy response to a math question has people talking',
|
||||||
|
'description': 'md5:a565d1deadd5086f3331d57298ec6333',
|
||||||
|
'duration': 83.0,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1435752600,
|
||||||
|
'upload_date': '20150701',
|
||||||
|
'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -101,6 +173,24 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
path += '/media'
|
path += '/media'
|
||||||
path += '/' + video_id
|
path += '/' + video_id
|
||||||
|
|
||||||
|
qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
if 'guid' in qs_dict:
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
|
||||||
|
feed_id = None
|
||||||
|
# feed id usually locates in the last script.
|
||||||
|
# Seems there's no pattern for the interested script filename, so
|
||||||
|
# I try one by one
|
||||||
|
for script in reversed(scripts):
|
||||||
|
feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
|
||||||
|
feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
|
||||||
|
if feed_id is not None:
|
||||||
|
break
|
||||||
|
if feed_id is None:
|
||||||
|
raise ExtractorError('Unable to find feed id')
|
||||||
|
return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % (
|
||||||
|
provider_id, feed_id, qs_dict['guid'][0]))
|
||||||
|
|
||||||
if smuggled_data.get('force_smil_url', False):
|
if smuggled_data.get('force_smil_url', False):
|
||||||
smil_url = url
|
smil_url = url
|
||||||
elif mobj.group('config'):
|
elif mobj.group('config'):
|
||||||
@@ -120,95 +210,85 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
if sig:
|
if sig:
|
||||||
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
|
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
|
||||||
|
|
||||||
meta = self._download_xml(smil_url, video_id)
|
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
|
||||||
try:
|
|
||||||
error_msg = next(
|
|
||||||
n.attrib['abstract']
|
|
||||||
for n in meta.findall(_x('.//smil:ref'))
|
|
||||||
if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
|
|
||||||
except StopIteration:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
raise ExtractorError(error_msg, expected=True)
|
|
||||||
|
|
||||||
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
ret = self.get_metadata(path, video_id)
|
||||||
info_json = self._download_webpage(info_url, video_id)
|
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
|
||||||
info = json.loads(info_json)
|
ret.update({
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
captions = info.get('captions')
|
|
||||||
if isinstance(captions, list):
|
|
||||||
for caption in captions:
|
|
||||||
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
|
|
||||||
subtitles[lang] = [{
|
|
||||||
'ext': 'srt' if mime == 'text/srt' else 'ttml',
|
|
||||||
'url': src,
|
|
||||||
}]
|
|
||||||
|
|
||||||
head = meta.find(_x('smil:head'))
|
|
||||||
body = meta.find(_x('smil:body'))
|
|
||||||
|
|
||||||
f4m_node = body.find(_x('smil:seq//smil:video'))
|
|
||||||
if f4m_node is None:
|
|
||||||
f4m_node = body.find(_x('smil:seq/smil:video'))
|
|
||||||
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
|
|
||||||
f4m_url = f4m_node.attrib['src']
|
|
||||||
if 'manifest.f4m?' not in f4m_url:
|
|
||||||
f4m_url += '?'
|
|
||||||
# the parameters are from syfy.com, other sites may use others,
|
|
||||||
# they also work for nbc.com
|
|
||||||
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
|
|
||||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
|
||||||
else:
|
|
||||||
formats = []
|
|
||||||
switch = body.find(_x('smil:switch'))
|
|
||||||
if switch is None:
|
|
||||||
switch = body.find(_x('smil:par//smil:switch'))
|
|
||||||
if switch is None:
|
|
||||||
switch = body.find(_x('smil:par/smil:switch'))
|
|
||||||
if switch is None:
|
|
||||||
switch = body.find(_x('smil:par'))
|
|
||||||
if switch is not None:
|
|
||||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
|
||||||
for f in switch.findall(_x('smil:video')):
|
|
||||||
attr = f.attrib
|
|
||||||
width = int_or_none(attr.get('width'))
|
|
||||||
height = int_or_none(attr.get('height'))
|
|
||||||
vbr = int_or_none(attr.get('system-bitrate'), 1000)
|
|
||||||
format_id = '%dx%d_%dk' % (width, height, vbr)
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': base_url,
|
|
||||||
'play_path': 'mp4:' + attr['src'],
|
|
||||||
'ext': 'flv',
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'vbr': vbr,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
switch = body.find(_x('smil:seq//smil:switch'))
|
|
||||||
if switch is None:
|
|
||||||
switch = body.find(_x('smil:seq/smil:switch'))
|
|
||||||
for f in switch.findall(_x('smil:video')):
|
|
||||||
attr = f.attrib
|
|
||||||
vbr = int_or_none(attr.get('system-bitrate'), 1000)
|
|
||||||
ext = determine_ext(attr['src'])
|
|
||||||
if ext == 'once':
|
|
||||||
ext = 'mp4'
|
|
||||||
formats.append({
|
|
||||||
'format_id': compat_str(vbr),
|
|
||||||
'url': attr['src'],
|
|
||||||
'vbr': vbr,
|
|
||||||
'ext': ext,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info['title'],
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': info['description'],
|
'subtitles': combined_subtitles,
|
||||||
'thumbnail': info['defaultThumbnailUrl'],
|
})
|
||||||
'duration': int_or_none(info.get('duration'), 1000),
|
|
||||||
}
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||||
|
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s'
|
||||||
|
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)'
|
||||||
|
_TEST = {
|
||||||
|
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
|
||||||
|
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
|
||||||
|
'md5': '22d2b84f058d3586efcd99e57d59d314',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'n_hardball_5biden_140207',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Biden factor: will Joe run in 2016?',
|
||||||
|
'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20140208',
|
||||||
|
'timestamp': 1391824260,
|
||||||
|
'duration': 467.0,
|
||||||
|
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
provider_id = mobj.group('provider_id')
|
||||||
|
feed_id = mobj.group('feed_id')
|
||||||
|
|
||||||
|
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id)
|
||||||
|
feed = self._download_json(real_url, video_id)
|
||||||
|
entry = feed['entries'][0]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
|
first_video_id = None
|
||||||
|
duration = None
|
||||||
|
for item in entry['media$content']:
|
||||||
|
smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
|
||||||
|
cur_video_id = url_basename(smil_url)
|
||||||
|
if first_video_id is None:
|
||||||
|
first_video_id = cur_video_id
|
||||||
|
duration = float_or_none(item.get('plfile$duration'))
|
||||||
|
cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
|
||||||
|
formats.extend(cur_formats)
|
||||||
|
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = [{
|
||||||
|
'url': thumbnail['plfile$url'],
|
||||||
|
'width': int_or_none(thumbnail.get('plfile$width')),
|
||||||
|
'height': int_or_none(thumbnail.get('plfile$height')),
|
||||||
|
} for thumbnail in entry.get('media$thumbnails', [])]
|
||||||
|
|
||||||
|
timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
|
||||||
|
categories = [item['media$name'] for item in entry.get('media$categories', [])]
|
||||||
|
|
||||||
|
ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
|
||||||
|
subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
|
||||||
|
ret.update({
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'categories': categories,
|
||||||
|
})
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|||||||
@@ -60,9 +60,7 @@ class TubiTvIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
|
if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
|
||||||
raise ExtractorError(
|
self.raise_login_required('This video requires login')
|
||||||
'This video requires login, use --username and --password '
|
|
||||||
'options to provide account credentials.', expected=True)
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
|||||||
@@ -12,9 +12,11 @@ from ..compat import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
@@ -26,7 +28,7 @@ class TwitchBaseIE(InfoExtractor):
|
|||||||
_API_BASE = 'https://api.twitch.tv'
|
_API_BASE = 'https://api.twitch.tv'
|
||||||
_USHER_BASE = 'http://usher.twitch.tv'
|
_USHER_BASE = 'http://usher.twitch.tv'
|
||||||
_LOGIN_URL = 'https://secure.twitch.tv/login'
|
_LOGIN_URL = 'https://secure.twitch.tv/login'
|
||||||
_LOGIN_POST_URL = 'https://passport.twitch.tv/authorize'
|
_LOGIN_POST_URL = 'https://passport.twitch.tv/authentications/new'
|
||||||
_NETRC_MACHINE = 'twitch'
|
_NETRC_MACHINE = 'twitch'
|
||||||
|
|
||||||
def _handle_error(self, response):
|
def _handle_error(self, response):
|
||||||
@@ -69,8 +71,15 @@ class TwitchBaseIE(InfoExtractor):
|
|||||||
'password': password.encode('utf-8'),
|
'password': password.encode('utf-8'),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
post_url = self._search_regex(
|
||||||
|
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||||
|
'post url', default=self._LOGIN_POST_URL, group='url')
|
||||||
|
|
||||||
|
if not post_url.startswith('http'):
|
||||||
|
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||||
|
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||||
request.add_header('Referer', self._LOGIN_URL)
|
request.add_header('Referer', self._LOGIN_URL)
|
||||||
response = self._download_webpage(
|
response = self._download_webpage(
|
||||||
request, None, 'Logging in as %s' % username)
|
request, None, 'Logging in as %s' % username)
|
||||||
@@ -132,14 +141,14 @@ class TwitchItemBaseIE(TwitchBaseIE):
|
|||||||
def _extract_info(self, info):
|
def _extract_info(self, info):
|
||||||
return {
|
return {
|
||||||
'id': info['_id'],
|
'id': info['_id'],
|
||||||
'title': info['title'],
|
'title': info.get('title') or 'Untitled Broadcast',
|
||||||
'description': info['description'],
|
'description': info.get('description'),
|
||||||
'duration': info['length'],
|
'duration': int_or_none(info.get('length')),
|
||||||
'thumbnail': info['preview'],
|
'thumbnail': info.get('preview'),
|
||||||
'uploader': info['channel']['display_name'],
|
'uploader': info.get('channel', {}).get('display_name'),
|
||||||
'uploader_id': info['channel']['name'],
|
'uploader_id': info.get('channel', {}).get('name'),
|
||||||
'timestamp': parse_iso8601(info['recorded_at']),
|
'timestamp': parse_iso8601(info.get('recorded_at')),
|
||||||
'view_count': info['views'],
|
'view_count': int_or_none(info.get('views')),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -187,7 +196,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
|||||||
_ITEM_TYPE = 'vod'
|
_ITEM_TYPE = 'vod'
|
||||||
_ITEM_SHORTCUT = 'v'
|
_ITEM_SHORTCUT = 'v'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
|
'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'v6528877',
|
'id': 'v6528877',
|
||||||
@@ -206,7 +215,26 @@ class TwitchVodIE(TwitchItemBaseIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
# Untitled broadcast (title is None)
|
||||||
|
'url': 'http://www.twitch.tv/belkao_o/v/11230755',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v11230755',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Untitled Broadcast',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1638,
|
||||||
|
'timestamp': 1439746708,
|
||||||
|
'upload_date': '20150816',
|
||||||
|
'uploader': 'BelkAO_o',
|
||||||
|
'uploader_id': 'belkao_o',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
item_id = self._match_id(url)
|
item_id = self._match_id(url)
|
||||||
|
|||||||
@@ -70,9 +70,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
raise ExtractorError(
|
self.raise_login_required('Udemy account is required')
|
||||||
'Udemy account is required, use --username and --password options to provide account credentials.',
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
login_popup = self._download_webpage(
|
login_popup = self._download_webpage(
|
||||||
self._LOGIN_URL, None, 'Downloading login popup')
|
self._LOGIN_URL, None, 'Downloading login popup')
|
||||||
|
|||||||
@@ -1,81 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import int_or_none
|
|
||||||
|
|
||||||
|
|
||||||
class VideoBamIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
|
|
||||||
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'http://videobam.com/OiJQM',
|
|
||||||
'md5': 'db471f27763a531f10416a0c58b5a1e0',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'OiJQM',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Is Alcohol Worse Than Ecstasy?',
|
|
||||||
'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
|
|
||||||
'uploader': 'frihetsvinge',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://videobam.com/pqLvq',
|
|
||||||
'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
|
|
||||||
'note': 'HD video',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'pqLvq',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '_',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
for preference, format_id in enumerate(['low', 'high']):
|
|
||||||
mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
|
|
||||||
if not mobj:
|
|
||||||
continue
|
|
||||||
formats.append({
|
|
||||||
'url': mobj.group('url'),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'format_id': format_id,
|
|
||||||
'preference': preference,
|
|
||||||
})
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
|
|
||||||
formats = [{
|
|
||||||
'url': item['url'],
|
|
||||||
'ext': 'mp4',
|
|
||||||
} for item in player_config['playlist'] if 'autoPlay' in item]
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
title = self._og_search_title(page, default='_', fatal=False)
|
|
||||||
description = self._og_search_description(page, default=None)
|
|
||||||
thumbnail = self._og_search_thumbnail(page)
|
|
||||||
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
|
|
||||||
view_count = int_or_none(
|
|
||||||
self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'uploader': uploader,
|
|
||||||
'view_count': view_count,
|
|
||||||
'formats': formats,
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
@@ -1,10 +1,12 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
str_to_int,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -12,18 +14,41 @@ class VidmeIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
|
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vid.me/QNB',
|
'url': 'https://vid.me/QNB',
|
||||||
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
'md5': 'c62f1156138dc3323902188c5b5a8bd6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'QNB',
|
'id': 'QNB',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Fishing for piranha - the easy way',
|
'title': 'Fishing for piranha - the easy way',
|
||||||
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
|
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
|
||||||
'duration': 119.92,
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'timestamp': 1406313244,
|
'timestamp': 1406313244,
|
||||||
'upload_date': '20140725',
|
'upload_date': '20140725',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'age_limit': 0,
|
||||||
|
'duration': 119.92,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://vid.me/Gc6M',
|
||||||
|
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Gc6M',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1441211642,
|
||||||
|
'upload_date': '20150902',
|
||||||
|
'uploader': 'SunshineM',
|
||||||
|
'uploader_id': '3552827',
|
||||||
|
'age_limit': 0,
|
||||||
|
'duration': 223.72,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# tests uploader field
|
# tests uploader field
|
||||||
@@ -33,63 +58,94 @@ class VidmeIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Carver',
|
'title': 'The Carver',
|
||||||
'description': 'md5:e9c24870018ae8113be936645b93ba3c',
|
'description': 'md5:e9c24870018ae8113be936645b93ba3c',
|
||||||
'duration': 97.859999999999999,
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'timestamp': 1433203629,
|
'timestamp': 1433203629,
|
||||||
'upload_date': '20150602',
|
'upload_date': '20150602',
|
||||||
'uploader': 'Thomas',
|
'uploader': 'Thomas',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'uploader_id': '109747',
|
||||||
|
'age_limit': 0,
|
||||||
|
'duration': 97.859999999999999,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# From http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
|
# nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
|
||||||
'url': 'https://vid.me/e/Wmur',
|
'url': 'https://vid.me/e/Wmur',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': 'Wmur',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'naked smoking & stretching',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1430931613,
|
||||||
|
'upload_date': '20150506',
|
||||||
|
'uploader': 'naked-yogi',
|
||||||
|
'uploader_id': '1638622',
|
||||||
|
'age_limit': 18,
|
||||||
|
'duration': 653.26999999999998,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url = url.replace('vid.me/e/', 'vid.me/')
|
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
try:
|
||||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
response = self._download_json(
|
||||||
|
'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||||
|
response = self._parse_json(e.cause.read(), video_id)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
error = response.get('error')
|
||||||
description = self._og_search_description(webpage, default='')
|
if error:
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
raise ExtractorError(
|
||||||
timestamp = int_or_none(self._og_search_property(
|
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
|
||||||
'updated_time', webpage, fatal=False))
|
|
||||||
width = int_or_none(self._og_search_property(
|
video = response['video']
|
||||||
'video:width', webpage, fatal=False))
|
|
||||||
height = int_or_none(self._og_search_property(
|
formats = [{
|
||||||
'video:height', webpage, fatal=False))
|
'format_id': f.get('type'),
|
||||||
duration = float_or_none(self._html_search_regex(
|
'url': f['uri'],
|
||||||
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
|
'width': int_or_none(f.get('width')),
|
||||||
view_count = str_to_int(self._html_search_regex(
|
'height': int_or_none(f.get('height')),
|
||||||
r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?',
|
} for f in video.get('formats', []) if f.get('uri')]
|
||||||
webpage, 'view count', fatal=False))
|
self._sort_formats(formats)
|
||||||
like_count = str_to_int(self._html_search_regex(
|
|
||||||
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
|
title = video['title']
|
||||||
webpage, 'like count', fatal=False))
|
description = video.get('description')
|
||||||
uploader = self._html_search_regex(
|
thumbnail = video.get('thumbnail_url')
|
||||||
'class="video_author_username"[^>]*>([^<]+)',
|
timestamp = parse_iso8601(video.get('date_created'), ' ')
|
||||||
webpage, 'uploader', default=None)
|
uploader = video.get('user', {}).get('username')
|
||||||
|
uploader_id = video.get('user', {}).get('user_id')
|
||||||
|
age_limit = 18 if video.get('nsfw') is True else 0
|
||||||
|
duration = float_or_none(video.get('duration'))
|
||||||
|
view_count = int_or_none(video.get('view_count'))
|
||||||
|
like_count = int_or_none(video.get('likes_count'))
|
||||||
|
comment_count = int_or_none(video.get('comment_count'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'age_limit': age_limit,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'uploader': uploader,
|
'comment_count': comment_count,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@@ -91,31 +92,27 @@ class VierVideosIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
program = mobj.group('program')
|
program = mobj.group('program')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, program)
|
|
||||||
|
|
||||||
page_id = mobj.group('page')
|
page_id = mobj.group('page')
|
||||||
if page_id:
|
if page_id:
|
||||||
page_id = int(page_id)
|
page_id = int(page_id)
|
||||||
start_page = page_id
|
start_page = page_id
|
||||||
last_page = start_page + 1
|
|
||||||
playlist_id = '%s-page%d' % (program, page_id)
|
playlist_id = '%s-page%d' % (program, page_id)
|
||||||
else:
|
else:
|
||||||
start_page = 0
|
start_page = 0
|
||||||
last_page = int(self._search_regex(
|
|
||||||
r'videos\?page=(\d+)">laatste</a>',
|
|
||||||
webpage, 'last page', default=0)) + 1
|
|
||||||
playlist_id = program
|
playlist_id = program
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for current_page_id in range(start_page, last_page):
|
for current_page_id in itertools.count(start_page):
|
||||||
current_page = self._download_webpage(
|
current_page = self._download_webpage(
|
||||||
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
|
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
|
||||||
program,
|
program,
|
||||||
'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
|
'Downloading page %d' % (current_page_id + 1))
|
||||||
page_entries = [
|
page_entries = [
|
||||||
self.url_result('http://www.vier.be' + video_url, 'Vier')
|
self.url_result('http://www.vier.be' + video_url, 'Vier')
|
||||||
for video_url in re.findall(
|
for video_url in re.findall(
|
||||||
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
|
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
|
||||||
entries.extend(page_entries)
|
entries.extend(page_entries)
|
||||||
|
if page_id or '>Meer<' not in current_page:
|
||||||
|
break
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id)
|
return self.playlist_result(entries, playlist_id)
|
||||||
|
|||||||
86
youtube_dl/extractor/vlive.py
Normal file
86
youtube_dl/extractor/vlive.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import hmac
|
||||||
|
from hashlib import sha1
|
||||||
|
from base64 import b64encode
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
determine_ext
|
||||||
|
)
|
||||||
|
from ..compat import compat_urllib_parse
|
||||||
|
|
||||||
|
|
||||||
|
class VLiveIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vlive'
|
||||||
|
# www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices
|
||||||
|
_VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://m.vlive.tv/video/1326',
|
||||||
|
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1326',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '[V] Girl\'s Day\'s Broadcast',
|
||||||
|
'creator': 'Girl\'s Day',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://m.vlive.tv/video/%s' % video_id,
|
||||||
|
video_id, note='Download video page')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
creator = self._html_search_regex(
|
||||||
|
r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator')
|
||||||
|
|
||||||
|
url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id
|
||||||
|
msgpad = '%.0f' % (time() * 1000)
|
||||||
|
md = b64encode(
|
||||||
|
hmac.new(self._SECRET.encode('ascii'),
|
||||||
|
(url[:255] + msgpad).encode('ascii'), sha1).digest()
|
||||||
|
)
|
||||||
|
url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md})
|
||||||
|
playinfo = self._download_json(url, video_id, 'Downloading video json')
|
||||||
|
|
||||||
|
if playinfo.get('message', '') != 'success':
|
||||||
|
raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful'))
|
||||||
|
|
||||||
|
if not playinfo.get('result'):
|
||||||
|
raise ExtractorError('No videos found.')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for vid in playinfo['result'].get('videos', {}).get('list', []):
|
||||||
|
formats.append({
|
||||||
|
'url': vid['source'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'abr': vid.get('bitrate', {}).get('audio'),
|
||||||
|
'vbr': vid.get('bitrate', {}).get('video'),
|
||||||
|
'format_id': vid['encodingOption']['name'],
|
||||||
|
'height': vid.get('height'),
|
||||||
|
'width': vid.get('width'),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for caption in playinfo['result'].get('captions', {}).get('list', []):
|
||||||
|
subtitles[caption['language']] = [
|
||||||
|
{'ext': determine_ext(caption['source'], default_ext='vtt'),
|
||||||
|
'url': caption['source']}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'creator': creator,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
@@ -19,25 +19,25 @@ class WashingtonPostIE(InfoExtractor):
|
|||||||
'title': 'Sinkhole of bureaucracy',
|
'title': 'Sinkhole of bureaucracy',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'md5': '79132cc09ec5309fa590ae46e4cc31bc',
|
'md5': 'b9be794ceb56c7267d410a13f99d801a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
|
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Breaking Points: The Paper Mine',
|
'title': 'Breaking Points: The Paper Mine',
|
||||||
'duration': 1287,
|
'duration': 1290,
|
||||||
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
|
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
|
||||||
'uploader': 'The Washington Post',
|
'uploader': 'The Washington Post',
|
||||||
'timestamp': 1395527908,
|
'timestamp': 1395527908,
|
||||||
'upload_date': '20140322',
|
'upload_date': '20140322',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'md5': 'e1d5734c06865cc504ad99dc2de0d443',
|
'md5': '1fff6a689d8770966df78c8cb6c8c17c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
|
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The town bureaucracy sustains',
|
'title': 'The town bureaucracy sustains',
|
||||||
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
|
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
|
||||||
'duration': 2217,
|
'duration': 2220,
|
||||||
'timestamp': 1395528005,
|
'timestamp': 1395528005,
|
||||||
'upload_date': '20140322',
|
'upload_date': '20140322',
|
||||||
'uploader': 'The Washington Post',
|
'uploader': 'The Washington Post',
|
||||||
|
|||||||
@@ -1,40 +1,33 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
class WimpIE(InfoExtractor):
|
class WimpIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
|
_VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)/'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.wimp.com/maruexhausted/',
|
'url': 'http://www.wimp.com/maruexhausted/',
|
||||||
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
|
'md5': 'ee21217ffd66d058e8b16be340b74883',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'maruexhausted',
|
'id': 'maruexhausted',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Maru is exhausted.',
|
'title': 'Maru is exhausted.',
|
||||||
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
|
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# youtube video
|
|
||||||
'url': 'http://www.wimp.com/clowncar/',
|
'url': 'http://www.wimp.com/clowncar/',
|
||||||
|
'md5': '4e2986c793694b55b37cf92521d12bb4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cG4CEr2aiSg',
|
'id': 'clowncar',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Basset hound clown car...incredible!',
|
'title': 'It\'s like a clown car.',
|
||||||
'description': 'md5:8d228485e0719898c017203f900b3a35',
|
'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2',
|
||||||
'uploader': 'Gretchen Hoey',
|
|
||||||
'uploader_id': 'gretchenandjeff1',
|
|
||||||
'upload_date': '20140303',
|
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(1)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
[r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],
|
[r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class XuiteIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Audio
|
# Audio
|
||||||
'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
|
'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
|
||||||
'md5': '63a42c705772aa53fd4c1a0027f86adf',
|
'md5': 'e79284c87b371424885448d11f6398c8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3860914',
|
'id': '3860914',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ class YahooIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||||
'md5': '67010fdf3a08d290e060a4dd96baa07b',
|
'md5': '88e209b417f173d86186bef6e4d1f160',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
|
'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
|||||||
@@ -1,18 +1,38 @@
|
|||||||
# coding=utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class YandexMusicBaseIE(InfoExtractor):
|
class YandexMusicTrackIE(InfoExtractor):
|
||||||
|
IE_NAME = 'yandexmusic:track'
|
||||||
|
IE_DESC = 'Яндекс.Музыка - Трек'
|
||||||
|
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://music.yandex.ru/album/540508/track/4878838',
|
||||||
|
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4878838',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Carlo Ambrosio - Gypsy Eyes 1',
|
||||||
|
'filesize': 4628061,
|
||||||
|
'duration': 193.04,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def _get_track_url(self, storage_dir, track_id):
|
def _get_track_url(self, storage_dir, track_id):
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
|
'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
|
||||||
@@ -35,24 +55,6 @@ class YandexMusicBaseIE(InfoExtractor):
|
|||||||
'duration': float_or_none(track.get('durationMs'), 1000),
|
'duration': float_or_none(track.get('durationMs'), 1000),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class YandexMusicTrackIE(YandexMusicBaseIE):
|
|
||||||
IE_NAME = 'yandexmusic:track'
|
|
||||||
IE_DESC = 'Яндекс.Музыка - Трек'
|
|
||||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://music.yandex.ru/album/540508/track/4878838',
|
|
||||||
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4878838',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Carlo Ambrosio - Gypsy Eyes 1',
|
|
||||||
'filesize': 4628061,
|
|
||||||
'duration': 193.04,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
album_id, track_id = mobj.group('album_id'), mobj.group('id')
|
album_id, track_id = mobj.group('album_id'), mobj.group('id')
|
||||||
@@ -64,7 +66,15 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
|||||||
return self._get_track_info(track)
|
return self._get_track_info(track)
|
||||||
|
|
||||||
|
|
||||||
class YandexMusicAlbumIE(YandexMusicBaseIE):
|
class YandexMusicPlaylistBaseIE(InfoExtractor):
|
||||||
|
def _build_playlist(self, tracks):
|
||||||
|
return [
|
||||||
|
self.url_result(
|
||||||
|
'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id']))
|
||||||
|
for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)]
|
||||||
|
|
||||||
|
|
||||||
|
class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
|
||||||
IE_NAME = 'yandexmusic:album'
|
IE_NAME = 'yandexmusic:album'
|
||||||
IE_DESC = 'Яндекс.Музыка - Альбом'
|
IE_DESC = 'Яндекс.Музыка - Альбом'
|
||||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
|
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
|
||||||
@@ -85,7 +95,7 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
|
|||||||
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
|
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
|
||||||
album_id, 'Downloading album JSON')
|
album_id, 'Downloading album JSON')
|
||||||
|
|
||||||
entries = [self._get_track_info(track) for track in album['volumes'][0]]
|
entries = self._build_playlist(album['volumes'][0])
|
||||||
|
|
||||||
title = '%s - %s' % (album['artists'][0]['name'], album['title'])
|
title = '%s - %s' % (album['artists'][0]['name'], album['title'])
|
||||||
year = album.get('year')
|
year = album.get('year')
|
||||||
@@ -95,12 +105,12 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
|
|||||||
return self.playlist_result(entries, compat_str(album['id']), title)
|
return self.playlist_result(entries, compat_str(album['id']), title)
|
||||||
|
|
||||||
|
|
||||||
class YandexMusicPlaylistIE(YandexMusicBaseIE):
|
class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
|
||||||
IE_NAME = 'yandexmusic:playlist'
|
IE_NAME = 'yandexmusic:playlist'
|
||||||
IE_DESC = 'Яндекс.Музыка - Плейлист'
|
IE_DESC = 'Яндекс.Музыка - Плейлист'
|
||||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
|
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
|
'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1245',
|
'id': '1245',
|
||||||
@@ -108,20 +118,54 @@ class YandexMusicPlaylistIE(YandexMusicBaseIE):
|
|||||||
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
|
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
|
||||||
},
|
},
|
||||||
'playlist_count': 6,
|
'playlist_count': 6,
|
||||||
}
|
}, {
|
||||||
|
# playlist exceeding the limit of 150 tracks shipped with webpage (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/6666)
|
||||||
|
'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1036',
|
||||||
|
'title': 'Музыка 90-х',
|
||||||
|
},
|
||||||
|
'playlist_count': 310,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
playlist = self._parse_json(
|
mu = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
|
r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
|
||||||
playlist_id)['pageData']['playlist']
|
playlist_id)
|
||||||
|
|
||||||
entries = [self._get_track_info(track) for track in playlist['tracks']]
|
playlist = mu['pageData']['playlist']
|
||||||
|
tracks, track_ids = playlist['tracks'], playlist['trackIds']
|
||||||
|
|
||||||
|
# tracks dictionary shipped with webpage is limited to 150 tracks,
|
||||||
|
# missing tracks should be retrieved manually.
|
||||||
|
if len(tracks) < len(track_ids):
|
||||||
|
present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')])
|
||||||
|
missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids)
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
'https://music.yandex.ru/handlers/track-entries.jsx',
|
||||||
|
compat_urllib_parse.urlencode({
|
||||||
|
'entries': ','.join(missing_track_ids),
|
||||||
|
'lang': mu.get('settings', {}).get('lang', 'en'),
|
||||||
|
'external-domain': 'music.yandex.ru',
|
||||||
|
'overembed': 'false',
|
||||||
|
'sign': mu.get('authData', {}).get('user', {}).get('sign'),
|
||||||
|
'strict': 'true',
|
||||||
|
}).encode('utf-8'))
|
||||||
|
request.add_header('Referer', url)
|
||||||
|
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||||
|
|
||||||
|
missing_tracks = self._download_json(
|
||||||
|
request, playlist_id, 'Downloading missing tracks JSON', fatal=False)
|
||||||
|
if missing_tracks:
|
||||||
|
tracks.extend(missing_tracks)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, compat_str(playlist_id),
|
self._build_playlist(tracks),
|
||||||
|
compat_str(playlist_id),
|
||||||
playlist['title'], playlist.get('description'))
|
playlist['title'], playlist.get('description'))
|
||||||
|
|||||||
@@ -49,6 +49,17 @@ class YoukuIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_count': 13,
|
'playlist_count': 13,
|
||||||
'skip': 'Available in China only',
|
'skip': 'Available in China only',
|
||||||
|
}, {
|
||||||
|
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
|
||||||
|
'note': 'Video protected with password',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'XNjA1NzA2Njgw',
|
||||||
|
'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
|
||||||
|
},
|
||||||
|
'playlist_count': 19,
|
||||||
|
'params': {
|
||||||
|
'videopassword': '100600',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def construct_video_urls(self, data1, data2):
|
def construct_video_urls(self, data1, data2):
|
||||||
@@ -185,9 +196,15 @@ class YoukuIE(InfoExtractor):
|
|||||||
raw_data = self._download_json(req, video_id, note=note)
|
raw_data = self._download_json(req, video_id, note=note)
|
||||||
return raw_data['data'][0]
|
return raw_data['data'][0]
|
||||||
|
|
||||||
|
video_password = self._downloader.params.get('videopassword', None)
|
||||||
|
|
||||||
# request basic data
|
# request basic data
|
||||||
|
basic_data_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
|
||||||
|
if video_password:
|
||||||
|
basic_data_url += '?password=%s' % video_password
|
||||||
|
|
||||||
data1 = retrieve_data(
|
data1 = retrieve_data(
|
||||||
'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id,
|
basic_data_url,
|
||||||
'Downloading JSON metadata 1')
|
'Downloading JSON metadata 1')
|
||||||
data2 = retrieve_data(
|
data2 = retrieve_data(
|
||||||
'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
|
'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
encode_dict,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
@@ -111,10 +112,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
'hl': 'en_US',
|
'hl': 'en_US',
|
||||||
}
|
}
|
||||||
|
|
||||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
|
||||||
# chokes on unicode
|
|
||||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
|
|
||||||
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
|
|
||||||
|
|
||||||
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||||
login_results = self._download_webpage(
|
login_results = self._download_webpage(
|
||||||
@@ -147,8 +145,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
'TrustDevice': 'on',
|
'TrustDevice': 'on',
|
||||||
})
|
})
|
||||||
|
|
||||||
tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
|
tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
|
||||||
tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
|
|
||||||
|
|
||||||
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
|
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
|
||||||
tfa_results = self._download_webpage(
|
tfa_results = self._download_webpage(
|
||||||
@@ -660,7 +657,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||||
id_m = re.match(
|
id_m = re.match(
|
||||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
|
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P<ext>[a-z]+)$',
|
||||||
player_url)
|
player_url)
|
||||||
if not id_m:
|
if not id_m:
|
||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||||
@@ -1243,7 +1240,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
|
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
|
||||||
if 'rtmpe%3Dyes' in encoded_url_map:
|
if 'rtmpe%3Dyes' in encoded_url_map:
|
||||||
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
|
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
|
||||||
url_map = {}
|
formats = []
|
||||||
for url_data_str in encoded_url_map.split(','):
|
for url_data_str in encoded_url_map.split(','):
|
||||||
url_data = compat_parse_qs(url_data_str)
|
url_data = compat_parse_qs(url_data_str)
|
||||||
if 'itag' not in url_data or 'url' not in url_data:
|
if 'itag' not in url_data or 'url' not in url_data:
|
||||||
@@ -1289,7 +1286,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
player_desc = 'flash player %s' % player_version
|
player_desc = 'flash player %s' % player_version
|
||||||
else:
|
else:
|
||||||
player_version = self._search_regex(
|
player_version = self._search_regex(
|
||||||
r'html5player-([^/]+?)(?:/html5player)?\.js',
|
r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||||
player_url,
|
player_url,
|
||||||
'html5 player', fatal=False)
|
'html5 player', fatal=False)
|
||||||
player_desc = 'html5 player %s' % player_version
|
player_desc = 'html5 player %s' % player_version
|
||||||
@@ -1303,8 +1300,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
url += '&signature=' + signature
|
url += '&signature=' + signature
|
||||||
if 'ratebypass' not in url:
|
if 'ratebypass' not in url:
|
||||||
url += '&ratebypass=yes'
|
url += '&ratebypass=yes'
|
||||||
url_map[format_id] = url
|
|
||||||
formats = _map_to_format_list(url_map)
|
# Some itags are not included in DASH manifest thus corresponding formats will
|
||||||
|
# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
|
||||||
|
# Trying to extract metadata from url_encoded_fmt_stream_map entry.
|
||||||
|
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
|
||||||
|
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
|
||||||
|
dct = {
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': url,
|
||||||
|
'player_url': player_url,
|
||||||
|
'filesize': int_or_none(url_data.get('clen', [None])[0]),
|
||||||
|
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'fps': int_or_none(url_data.get('fps', [None])[0]),
|
||||||
|
'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
|
||||||
|
}
|
||||||
|
type_ = url_data.get('type', [None])[0]
|
||||||
|
if type_:
|
||||||
|
type_split = type_.split(';')
|
||||||
|
kind_ext = type_split[0].split('/')
|
||||||
|
if len(kind_ext) == 2:
|
||||||
|
kind, ext = kind_ext
|
||||||
|
dct['ext'] = ext
|
||||||
|
if kind in ('audio', 'video'):
|
||||||
|
codecs = None
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
|
||||||
|
if mobj.group('key') == 'codecs':
|
||||||
|
codecs = mobj.group('val')
|
||||||
|
break
|
||||||
|
if codecs:
|
||||||
|
codecs = codecs.split(',')
|
||||||
|
if len(codecs) == 2:
|
||||||
|
acodec, vcodec = codecs[0], codecs[1]
|
||||||
|
else:
|
||||||
|
acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
|
||||||
|
dct.update({
|
||||||
|
'acodec': acodec,
|
||||||
|
'vcodec': vcodec,
|
||||||
|
})
|
||||||
|
if format_id in self._formats:
|
||||||
|
dct.update(self._formats[format_id])
|
||||||
|
formats.append(dct)
|
||||||
elif video_info.get('hlsvp'):
|
elif video_info.get('hlsvp'):
|
||||||
manifest_url = video_info['hlsvp'][0]
|
manifest_url = video_info['hlsvp'][0]
|
||||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||||
@@ -1796,8 +1835,8 @@ class YoutubeShowIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
|
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
|
||||||
IE_NAME = 'youtube:show'
|
IE_NAME = 'youtube:show'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.youtube.com/show/airdisasters',
|
'url': 'https://www.youtube.com/show/airdisasters',
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 5,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'airdisasters',
|
'id': 'airdisasters',
|
||||||
'title': 'Air Disasters',
|
'title': 'Air Disasters',
|
||||||
@@ -1808,7 +1847,7 @@ class YoutubeShowIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
playlist_id = mobj.group('id')
|
playlist_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, playlist_id, 'Downloading show webpage')
|
'https://www.youtube.com/show/%s/playlists' % playlist_id, playlist_id, 'Downloading show webpage')
|
||||||
# There's one playlist for each season of the show
|
# There's one playlist for each season of the show
|
||||||
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
||||||
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
|
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import os.path
|
import os.path
|
||||||
import optparse
|
import optparse
|
||||||
import shlex
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from .downloader.external import list_external_downloaders
|
from .downloader.external import list_external_downloaders
|
||||||
@@ -11,6 +10,7 @@ from .compat import (
|
|||||||
compat_get_terminal_size,
|
compat_get_terminal_size,
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
|
compat_shlex_split,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
@@ -28,7 +28,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
try:
|
try:
|
||||||
res = []
|
res = []
|
||||||
for l in optionf:
|
for l in optionf:
|
||||||
res += shlex.split(l, comments=True)
|
res += compat_shlex_split(l, comments=True)
|
||||||
finally:
|
finally:
|
||||||
optionf.close()
|
optionf.close()
|
||||||
return res
|
return res
|
||||||
@@ -320,7 +320,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
authentication.add_option(
|
authentication.add_option(
|
||||||
'--video-password',
|
'--video-password',
|
||||||
dest='videopassword', metavar='PASSWORD',
|
dest='videopassword', metavar='PASSWORD',
|
||||||
help='Video password (vimeo, smotri)')
|
help='Video password (vimeo, smotri, youku)')
|
||||||
|
|
||||||
video_format = optparse.OptionGroup(parser, 'Video Format Options')
|
video_format = optparse.OptionGroup(parser, 'Video Format Options')
|
||||||
video_format.add_option(
|
video_format.add_option(
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import os
|
|||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
PostProcessingError,
|
PostProcessingError,
|
||||||
|
cli_configuration_args,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -61,11 +62,7 @@ class PostProcessor(object):
|
|||||||
self._downloader.report_warning(errnote)
|
self._downloader.report_warning(errnote)
|
||||||
|
|
||||||
def _configuration_args(self, default=[]):
|
def _configuration_args(self, default=[]):
|
||||||
pp_args = self._downloader.params.get('postprocessor_args')
|
return cli_configuration_args(self._downloader.params, 'postprocessor_args', default)
|
||||||
if pp_args is None:
|
|
||||||
return default
|
|
||||||
assert isinstance(pp_args, list)
|
|
||||||
return pp_args
|
|
||||||
|
|
||||||
|
|
||||||
class AudioConversionError(PostProcessingError):
|
class AudioConversionError(PostProcessingError):
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ def write_json_file(obj, fn):
|
|||||||
if sys.version_info >= (2, 7):
|
if sys.version_info >= (2, 7):
|
||||||
def find_xpath_attr(node, xpath, key, val=None):
|
def find_xpath_attr(node, xpath, key, val=None):
|
||||||
""" Find the xpath xpath[@key=val] """
|
""" Find the xpath xpath[@key=val] """
|
||||||
assert re.match(r'^[a-zA-Z-]+$', key)
|
assert re.match(r'^[a-zA-Z_-]+$', key)
|
||||||
if val:
|
if val:
|
||||||
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
|
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
|
||||||
expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
|
expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
|
||||||
@@ -176,12 +176,12 @@ def xpath_with_ns(path, ns_map):
|
|||||||
return '/'.join(replaced)
|
return '/'.join(replaced)
|
||||||
|
|
||||||
|
|
||||||
def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
||||||
if sys.version_info < (2, 7): # Crazy 2.6
|
if sys.version_info < (2, 7): # Crazy 2.6
|
||||||
xpath = xpath.encode('ascii')
|
xpath = xpath.encode('ascii')
|
||||||
|
|
||||||
n = node.find(xpath)
|
n = node.find(xpath)
|
||||||
if n is None or n.text is None:
|
if n is None:
|
||||||
if default is not NO_DEFAULT:
|
if default is not NO_DEFAULT:
|
||||||
return default
|
return default
|
||||||
elif fatal:
|
elif fatal:
|
||||||
@@ -189,9 +189,37 @@ def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
|||||||
raise ExtractorError('Could not find XML element %s' % name)
|
raise ExtractorError('Could not find XML element %s' % name)
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
return n
|
||||||
|
|
||||||
|
|
||||||
|
def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
||||||
|
n = xpath_element(node, xpath, name, fatal=fatal, default=default)
|
||||||
|
if n is None or n == default:
|
||||||
|
return n
|
||||||
|
if n.text is None:
|
||||||
|
if default is not NO_DEFAULT:
|
||||||
|
return default
|
||||||
|
elif fatal:
|
||||||
|
name = xpath if name is None else name
|
||||||
|
raise ExtractorError('Could not find XML element\'s text %s' % name)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
return n.text
|
return n.text
|
||||||
|
|
||||||
|
|
||||||
|
def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
|
||||||
|
n = find_xpath_attr(node, xpath, key)
|
||||||
|
if n is None:
|
||||||
|
if default is not NO_DEFAULT:
|
||||||
|
return default
|
||||||
|
elif fatal:
|
||||||
|
name = '%s[@%s]' % (xpath, key) if name is None else name
|
||||||
|
raise ExtractorError('Could not find XML attribute %s' % name)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
return n.attrib[key]
|
||||||
|
|
||||||
|
|
||||||
def get_element_by_id(id, html):
|
def get_element_by_id(id, html):
|
||||||
"""Return the content of the tag with the specified ID in the passed HTML document"""
|
"""Return the content of the tag with the specified ID in the passed HTML document"""
|
||||||
return get_element_by_attribute("id", id, html)
|
return get_element_by_attribute("id", id, html)
|
||||||
@@ -587,6 +615,11 @@ class ContentTooShortError(Exception):
|
|||||||
|
|
||||||
|
|
||||||
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
||||||
|
# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
|
||||||
|
# expected HTTP responses to meet HTTP/1.0 or later (see also
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/6727)
|
||||||
|
if sys.version_info < (3, 0):
|
||||||
|
kwargs['strict'] = True
|
||||||
hc = http_class(*args, **kwargs)
|
hc = http_class(*args, **kwargs)
|
||||||
source_address = ydl_handler._params.get('source_address')
|
source_address = ydl_handler._params.get('source_address')
|
||||||
if source_address is not None:
|
if source_address is not None:
|
||||||
@@ -715,7 +748,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
gz = io.BytesIO(self.deflate(resp.read()))
|
gz = io.BytesIO(self.deflate(resp.read()))
|
||||||
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986
|
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/6457).
|
||||||
if 300 <= resp.code < 400:
|
if 300 <= resp.code < 400:
|
||||||
location = resp.headers.get('Location')
|
location = resp.headers.get('Location')
|
||||||
if location:
|
if location:
|
||||||
@@ -749,6 +783,30 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
|||||||
req, **kwargs)
|
req, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
|
||||||
|
def __init__(self, cookiejar=None):
|
||||||
|
compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
|
||||||
|
|
||||||
|
def http_response(self, request, response):
|
||||||
|
# Python 2 will choke on next HTTP request in row if there are non-ASCII
|
||||||
|
# characters in Set-Cookie HTTP header of last response (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/6769).
|
||||||
|
# In order to at least prevent crashing we will percent encode Set-Cookie
|
||||||
|
# header before HTTPCookieProcessor starts processing it.
|
||||||
|
# if sys.version_info < (3, 0) and response.headers:
|
||||||
|
# for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
|
||||||
|
# set_cookie = response.headers.get(set_cookie_header)
|
||||||
|
# if set_cookie:
|
||||||
|
# set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
|
||||||
|
# if set_cookie != set_cookie_escaped:
|
||||||
|
# del response.headers[set_cookie_header]
|
||||||
|
# response.headers[set_cookie_header] = set_cookie_escaped
|
||||||
|
return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
|
||||||
|
|
||||||
|
https_request = compat_urllib_request.HTTPCookieProcessor.http_request
|
||||||
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
def parse_iso8601(date_str, delimiter='T', timezone=None):
|
def parse_iso8601(date_str, delimiter='T', timezone=None):
|
||||||
""" Return a UNIX timestamp from the given date """
|
""" Return a UNIX timestamp from the given date """
|
||||||
|
|
||||||
@@ -1578,6 +1636,10 @@ def urlencode_postdata(*args, **kargs):
|
|||||||
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
||||||
|
|
||||||
|
|
||||||
|
def encode_dict(d, encoding='utf-8'):
|
||||||
|
return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items())
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
etree_iter = xml.etree.ElementTree.Element.iter
|
etree_iter = xml.etree.ElementTree.Element.iter
|
||||||
except AttributeError: # Python <=2.6
|
except AttributeError: # Python <=2.6
|
||||||
@@ -1918,6 +1980,32 @@ def dfxp2srt(dfxp_data):
|
|||||||
return ''.join(out)
|
return ''.join(out)
|
||||||
|
|
||||||
|
|
||||||
|
def cli_option(params, command_option, param):
|
||||||
|
param = params.get(param)
|
||||||
|
return [command_option, param] if param is not None else []
|
||||||
|
|
||||||
|
|
||||||
|
def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
|
||||||
|
param = params.get(param)
|
||||||
|
assert isinstance(param, bool)
|
||||||
|
if separator:
|
||||||
|
return [command_option + separator + (true_value if param else false_value)]
|
||||||
|
return [command_option, true_value if param else false_value]
|
||||||
|
|
||||||
|
|
||||||
|
def cli_valueless_option(params, command_option, param, expected_value=True):
|
||||||
|
param = params.get(param)
|
||||||
|
return [command_option] if param == expected_value else []
|
||||||
|
|
||||||
|
|
||||||
|
def cli_configuration_args(params, param, default=[]):
|
||||||
|
ex_args = params.get(param)
|
||||||
|
if ex_args is None:
|
||||||
|
return default
|
||||||
|
assert isinstance(ex_args, list)
|
||||||
|
return ex_args
|
||||||
|
|
||||||
|
|
||||||
class ISO639Utils(object):
|
class ISO639Utils(object):
|
||||||
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
||||||
_lang_map = {
|
_lang_map = {
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2015.08.16.1'
|
__version__ = '2015.09.09'
|
||||||
|
|||||||
Reference in New Issue
Block a user