Compare commits
215 Commits
2016.01.29
...
2016.02.13
Author | SHA1 | Date | |
---|---|---|---|
![]() |
35ced3985a | ||
![]() |
3e18700d45 | ||
![]() |
f9f49d87c2 | ||
![]() |
6863631c26 | ||
![]() |
9d939cec48 | ||
![]() |
4c77d3f52a | ||
![]() |
7be747b921 | ||
![]() |
bb20526b64 | ||
![]() |
bcbb1b08b2 | ||
![]() |
3d98f97c64 | ||
![]() |
c349456ef6 | ||
![]() |
5a4905924d | ||
![]() |
b826035dd5 | ||
![]() |
a7cab4d039 | ||
![]() |
fc3810f6d1 | ||
![]() |
3dc71d82ce | ||
![]() |
9c7b38981c | ||
![]() |
8b85ac3fd9 | ||
![]() |
81e1c4e2fc | ||
![]() |
388ae76b52 | ||
![]() |
b67d63149d | ||
![]() |
28280e8ded | ||
![]() |
6b3fbd3425 | ||
![]() |
a7ab46375b | ||
![]() |
b14d5e26f6 | ||
![]() |
9a61dfba0c | ||
![]() |
154c209e2d | ||
![]() |
d1ea5e171f | ||
![]() |
a1188d0ed0 | ||
![]() |
47d205a646 | ||
![]() |
80f772c28a | ||
![]() |
f817d9bec1 | ||
![]() |
e2effb08a4 | ||
![]() |
7fcea295c5 | ||
![]() |
cc799437ea | ||
![]() |
89d23f37f2 | ||
![]() |
b92071ef00 | ||
![]() |
47246ae26c | ||
![]() |
9c15869c28 | ||
![]() |
51e9094f4a | ||
![]() |
5e3a6fec33 | ||
![]() |
d413095f7e | ||
![]() |
1bedf4de06 | ||
![]() |
3967a761f4 | ||
![]() |
b081350bd9 | ||
![]() |
16f1430ba6 | ||
![]() |
085ad71157 | ||
![]() |
35972ba172 | ||
![]() |
3834d3e35c | ||
![]() |
8d0a2a2a4e | ||
![]() |
11c0339bec | ||
![]() |
915dd77783 | ||
![]() |
b6bfa6fb79 | ||
![]() |
f070197bd7 | ||
![]() |
5a7699bb2e | ||
![]() |
8628d26f38 | ||
![]() |
8411229bd5 | ||
![]() |
72b9ebc65d | ||
![]() |
3b799ca14c | ||
![]() |
0474512e30 | ||
![]() |
f0905c6ec3 | ||
![]() |
86296ad2cd | ||
![]() |
52f5889f77 | ||
![]() |
81e0b4f2d1 | ||
![]() |
cbecc9b903 | ||
![]() |
b8b465af3e | ||
![]() |
59b35c6745 | ||
![]() |
7032833011 | ||
![]() |
f406c78785 | ||
![]() |
f326b5837a | ||
![]() |
5dd4b3468f | ||
![]() |
d4f8e83404 | ||
![]() |
7b8b007cd9 | ||
![]() |
3547d26587 | ||
![]() |
7e62c2eb6d | ||
![]() |
56401e1e5f | ||
![]() |
860db2d508 | ||
![]() |
4b8874975c | ||
![]() |
bd6b6f6622 | ||
![]() |
4340727e6c | ||
![]() |
3ceccade87 | ||
![]() |
28ad7df65d | ||
![]() |
79a3508579 | ||
![]() |
1b840245bd | ||
![]() |
6a3828fddd | ||
![]() |
91cb6b5065 | ||
![]() |
0826a0b555 | ||
![]() |
bcbbb98bfe | ||
![]() |
66159b38aa | ||
![]() |
23d17e4beb | ||
![]() |
d97b0e3241 | ||
![]() |
eb2533ec4c | ||
![]() |
b7b365067f | ||
![]() |
86e284e028 | ||
![]() |
d9e543b680 | ||
![]() |
c773c232d8 | ||
![]() |
58ae24336a | ||
![]() |
7d3a035ee0 | ||
![]() |
e06e75c7e7 | ||
![]() |
593e0f43b4 | ||
![]() |
008ab0f814 | ||
![]() |
3f7e8750d4 | ||
![]() |
f1ed3acae5 | ||
![]() |
920d21b9d3 | ||
![]() |
2fb35d1c28 | ||
![]() |
09be85b8dd | ||
![]() |
eadc3ccd50 | ||
![]() |
255732f0d3 | ||
![]() |
53c269c6fd | ||
![]() |
675d001633 | ||
![]() |
58be922079 | ||
![]() |
c84d3a557d | ||
![]() |
d577c79632 | ||
![]() |
6ad2b01e14 | ||
![]() |
fd3a1f3d60 | ||
![]() |
87de7069b9 | ||
![]() |
6fba62c87a | ||
![]() |
f14be22816 | ||
![]() |
1df4141196 | ||
![]() |
fae45ede08 | ||
![]() |
4e0cff2a50 | ||
![]() |
9c74423510 | ||
![]() |
5976e7ab57 | ||
![]() |
a1a22572fb | ||
![]() |
c11875b328 | ||
![]() |
8ff648e4f9 | ||
![]() |
1bac34556f | ||
![]() |
0436157b95 | ||
![]() |
ae0db349c1 | ||
![]() |
08411970d5 | ||
![]() |
dc724e0c8b | ||
![]() |
0a5d1ec706 | ||
![]() |
58250eff2b | ||
![]() |
11a4efc505 | ||
![]() |
7537b35fb8 | ||
![]() |
33cc74eeeb | ||
![]() |
f021acee49 | ||
![]() |
abe694ca95 | ||
![]() |
b286f201a8 | ||
![]() |
bd93a12e85 | ||
![]() |
92769650fa | ||
![]() |
dc4fe5c6d7 | ||
![]() |
566bda51f2 | ||
![]() |
f63757ec35 | ||
![]() |
7a0ed06909 | ||
![]() |
9934fe76be | ||
![]() |
a8aad21001 | ||
![]() |
d055bf91cc | ||
![]() |
0e1b1a011d | ||
![]() |
eab3c2895c | ||
![]() |
163da6a484 | ||
![]() |
324916d11a | ||
![]() |
3ccb0655c1 | ||
![]() |
e04398e397 | ||
![]() |
231ea2a3bb | ||
![]() |
b99d88c6a1 | ||
![]() |
189d72d5fd | ||
![]() |
a7aab0c23e | ||
![]() |
a69bee4762 | ||
![]() |
9acd33094d | ||
![]() |
8e7aad2075 | ||
![]() |
ce5879fa14 | ||
![]() |
7b7507d6e1 | ||
![]() |
14823decf3 | ||
![]() |
673fb82e65 | ||
![]() |
181cf24bc0 | ||
![]() |
89f2602880 | ||
![]() |
db9b1dbcd9 | ||
![]() |
e881c4bcab | ||
![]() |
670ad51ade | ||
![]() |
eb6fc7d32a | ||
![]() |
ed1a390583 | ||
![]() |
809e1857c5 | ||
![]() |
7c38af48b9 | ||
![]() |
60ad3eb970 | ||
![]() |
a7685b3a6b | ||
![]() |
8f1fddc816 | ||
![]() |
1bf996fa5c | ||
![]() |
248ae880b6 | ||
![]() |
2d2fa82d17 | ||
![]() |
c94678957f | ||
![]() |
16f38a699f | ||
![]() |
a6c2c24479 | ||
![]() |
b8c9926c0a | ||
![]() |
df374b5222 | ||
![]() |
5ea1eb78f5 | ||
![]() |
5d2c0fd9ba | ||
![]() |
0803753fea | ||
![]() |
2c2f1efdcd | ||
![]() |
b323e1707d | ||
![]() |
09104e9930 | ||
![]() |
5fa1702ca6 | ||
![]() |
17b598d30c | ||
![]() |
53be8894e4 | ||
![]() |
c3deacd562 | ||
![]() |
8ab3fe81d8 | ||
![]() |
2f0a33d8a3 | ||
![]() |
05d0d131a7 | ||
![]() |
c140629995 | ||
![]() |
7d106a65ca | ||
![]() |
0179f6a830 | ||
![]() |
830afe85dc | ||
![]() |
8bf39420b4 | ||
![]() |
71d08b3e29 | ||
![]() |
06ffa33485 | ||
![]() |
874e05975b | ||
![]() |
f5d30d521c | ||
![]() |
e047922be0 | ||
![]() |
83ab8a79cc | ||
![]() |
350cf045d8 | ||
![]() |
68a0ea15b4 | ||
![]() |
2b4f5e68d1 | ||
![]() |
cf57433bbd | ||
![]() |
2b14cb566f | ||
![]() |
b6c33fd544 |
1
AUTHORS
1
AUTHORS
@@ -156,3 +156,4 @@ Tom Gijselinck
|
|||||||
Founder Fang
|
Founder Fang
|
||||||
Andrew Alexeyew
|
Andrew Alexeyew
|
||||||
Saso Bezlaj
|
Saso Bezlaj
|
||||||
|
Erwin de Haan
|
||||||
|
101
README.md
101
README.md
@@ -173,6 +173,10 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
expected filesize (experimental)
|
expected filesize (experimental)
|
||||||
--hls-prefer-native Use the native HLS downloader instead of
|
--hls-prefer-native Use the native HLS downloader instead of
|
||||||
ffmpeg (experimental)
|
ffmpeg (experimental)
|
||||||
|
--hls-use-mpegts Use the mpegts container for HLS videos,
|
||||||
|
allowing to play the video while
|
||||||
|
downloading (some players may not be able
|
||||||
|
to play it)
|
||||||
--external-downloader COMMAND Use the specified external downloader.
|
--external-downloader COMMAND Use the specified external downloader.
|
||||||
Currently supports
|
Currently supports
|
||||||
aria2c,axel,curl,httpie,wget
|
aria2c,axel,curl,httpie,wget
|
||||||
@@ -438,28 +442,97 @@ On Windows you may also need to setup the `%HOME%` environment variable manually
|
|||||||
|
|
||||||
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
|
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
|
||||||
|
|
||||||
- `id`: The sequence will be replaced by the video identifier.
|
- `id`: Video identifier
|
||||||
- `url`: The sequence will be replaced by the video URL.
|
- `title`: Video title
|
||||||
- `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video.
|
- `url`: Video URL
|
||||||
- `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format.
|
- `ext`: Video filename extension
|
||||||
- `title`: The sequence will be replaced by the video title.
|
- `alt_title`: A secondary title of the video
|
||||||
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
|
- `display_id`: An alternative identifier for the video
|
||||||
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
|
- `uploader`: Full name of the video uploader
|
||||||
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
- `creator`: The main artist who created the video
|
||||||
- `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video.
|
- `release_date`: The date (YYYYMMDD) when the video was released
|
||||||
- `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist.
|
- `timestamp`: UNIX timestamp of the moment the video became available
|
||||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
- `upload_date`: Video upload date (YYYYMMDD)
|
||||||
- `duration`: The sequence will be replaced by the length of the video in seconds.
|
- `uploader_id`: Nickname or id of the video uploader
|
||||||
|
- `location`: Physical location where the video was filmed
|
||||||
|
- `duration`: Length of the video in seconds
|
||||||
|
- `view_count`: How many users have watched the video on the platform
|
||||||
|
- `like_count`: Number of positive ratings of the video
|
||||||
|
- `dislike_count`: Number of negative ratings of the video
|
||||||
|
- `repost_count`: Number of reposts of the video
|
||||||
|
- `average_rating`: Average rating give by users, the scale used depends on the webpage
|
||||||
|
- `comment_count`: Number of comments on the video
|
||||||
|
- `age_limit`: Age restriction for the video (years)
|
||||||
|
- `format`: A human-readable description of the format
|
||||||
|
- `format_id`: Format code specified by `--format`
|
||||||
|
- `format_note`: Additional info about the format
|
||||||
|
- `width`: Width of the video
|
||||||
|
- `height`: Height of the video
|
||||||
|
- `resolution`: Textual description of width and height
|
||||||
|
- `tbr`: Average bitrate of audio and video in KBit/s
|
||||||
|
- `abr`: Average audio bitrate in KBit/s
|
||||||
|
- `acodec`: Name of the audio codec in use
|
||||||
|
- `asr`: Audio sampling rate in Hertz
|
||||||
|
- `vbr`: Average video bitrate in KBit/s
|
||||||
|
- `fps`: Frame rate
|
||||||
|
- `vcodec`: Name of the video codec in use
|
||||||
|
- `container`: Name of the container format
|
||||||
|
- `filesize`: The number of bytes, if known in advance
|
||||||
|
- `filesize_approx`: An estimate for the number of bytes
|
||||||
|
- `protocol`: The protocol that will be used for the actual download
|
||||||
|
- `extractor`: Name of the extractor
|
||||||
|
- `extractor_key`: Key name of the extractor
|
||||||
|
- `epoch`: Unix epoch when creating the file
|
||||||
|
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
||||||
|
- `playlist`: Name or id of the playlist that contains the video
|
||||||
|
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||||
|
|
||||||
|
Available for the video that belongs to some logical chapter or section:
|
||||||
|
- `chapter`: Name or title of the chapter the video belongs to
|
||||||
|
- `chapter_number`: Number of the chapter the video belongs to
|
||||||
|
- `chapter_id`: Id of the chapter the video belongs to
|
||||||
|
|
||||||
|
Available for the video that is an episode of some series or programme:
|
||||||
|
- `series`: Title of the series or programme the video episode belongs to
|
||||||
|
- `season`: Title of the season the video episode belongs to
|
||||||
|
- `season_number`: Number of the season the video episode belongs to
|
||||||
|
- `season_id`: Id of the season the video episode belongs to
|
||||||
|
- `episode`: Title of the video episode
|
||||||
|
- `episode_number`: Number of the video episode within a season
|
||||||
|
- `episode_id`: Id of the video episode
|
||||||
|
|
||||||
|
Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||||
|
|
||||||
|
For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||||
|
|
||||||
|
Output template can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` that will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||||
|
|
||||||
|
To specify percent literal in output template use `%%`. To output to stdout use `-o -`.
|
||||||
|
|
||||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||||
|
|
||||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||||
|
|
||||||
|
Examples (note on Windows you may need to use double quotes instead of single):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
|
||||||
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
|
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
|
||||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
|
||||||
|
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames
|
||||||
youtube-dl_test_video_.mp4 # A simple file name
|
youtube-dl_test_video_.mp4 # A simple file name
|
||||||
|
|
||||||
|
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
||||||
|
$ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||||
|
|
||||||
|
# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
|
||||||
|
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
|
||||||
|
|
||||||
|
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
|
||||||
|
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
|
||||||
|
|
||||||
|
# Stream the video being downloaded to stdout
|
||||||
|
$ youtube-dl -o - BaW_jenozKc
|
||||||
```
|
```
|
||||||
|
|
||||||
# FORMAT SELECTION
|
# FORMAT SELECTION
|
||||||
|
@@ -55,6 +55,7 @@
|
|||||||
- **audiomack**
|
- **audiomack**
|
||||||
- **audiomack:album**
|
- **audiomack:album**
|
||||||
- **Azubu**
|
- **Azubu**
|
||||||
|
- **AzubuLive**
|
||||||
- **BaiduVideo**: 百度视频
|
- **BaiduVideo**: 百度视频
|
||||||
- **bambuser**
|
- **bambuser**
|
||||||
- **bambuser:channel**
|
- **bambuser:channel**
|
||||||
@@ -88,8 +89,11 @@
|
|||||||
- **canalc2.tv**
|
- **canalc2.tv**
|
||||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||||
- **Canvas**
|
- **Canvas**
|
||||||
|
- **CBC**
|
||||||
|
- **CBCPlayer**
|
||||||
- **CBS**
|
- **CBS**
|
||||||
- **CBSNews**: CBS News
|
- **CBSNews**: CBS News
|
||||||
|
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||||
- **CBSSports**
|
- **CBSSports**
|
||||||
- **CeskaTelevize**
|
- **CeskaTelevize**
|
||||||
- **channel9**: Channel 9
|
- **channel9**: Channel 9
|
||||||
@@ -118,6 +122,7 @@
|
|||||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
- **Cracked**
|
- **Cracked**
|
||||||
|
- **Crackle**
|
||||||
- **Criterion**
|
- **Criterion**
|
||||||
- **CrooksAndLiars**
|
- **CrooksAndLiars**
|
||||||
- **Crunchyroll**
|
- **Crunchyroll**
|
||||||
@@ -133,6 +138,8 @@
|
|||||||
- **DailymotionCloud**
|
- **DailymotionCloud**
|
||||||
- **daum.net**
|
- **daum.net**
|
||||||
- **daum.net:clip**
|
- **daum.net:clip**
|
||||||
|
- **daum.net:playlist**
|
||||||
|
- **daum.net:user**
|
||||||
- **DBTV**
|
- **DBTV**
|
||||||
- **DCN**
|
- **DCN**
|
||||||
- **dcn:live**
|
- **dcn:live**
|
||||||
@@ -258,7 +265,7 @@
|
|||||||
- **Instagram**
|
- **Instagram**
|
||||||
- **instagram:user**: Instagram user profile
|
- **instagram:user**: Instagram user profile
|
||||||
- **InternetVideoArchive**
|
- **InternetVideoArchive**
|
||||||
- **IPrima** (Currently broken)
|
- **IPrima**
|
||||||
- **iqiyi**: 爱奇艺
|
- **iqiyi**: 爱奇艺
|
||||||
- **Ir90Tv**
|
- **Ir90Tv**
|
||||||
- **ivi**: ivi.ru
|
- **ivi**: ivi.ru
|
||||||
@@ -279,6 +286,7 @@
|
|||||||
- **KeezMovies**
|
- **KeezMovies**
|
||||||
- **KhanAcademy**
|
- **KhanAcademy**
|
||||||
- **KickStarter**
|
- **KickStarter**
|
||||||
|
- **KonserthusetPlay**
|
||||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||||
- **KrasView**: Красвью
|
- **KrasView**: Красвью
|
||||||
- **Ku6**
|
- **Ku6**
|
||||||
@@ -315,6 +323,7 @@
|
|||||||
- **mailru**: Видео@Mail.Ru
|
- **mailru**: Видео@Mail.Ru
|
||||||
- **MakerTV**
|
- **MakerTV**
|
||||||
- **Malemotion**
|
- **Malemotion**
|
||||||
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
- **metacafe**
|
- **metacafe**
|
||||||
@@ -439,6 +448,7 @@
|
|||||||
- **PlanetaPlay**
|
- **PlanetaPlay**
|
||||||
- **play.fm**
|
- **play.fm**
|
||||||
- **played.to**
|
- **played.to**
|
||||||
|
- **PlaysTV**
|
||||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||||
- **Playvid**
|
- **Playvid**
|
||||||
- **Playwire**
|
- **Playwire**
|
||||||
@@ -507,6 +517,7 @@
|
|||||||
- **Sapo**: SAPO Vídeos
|
- **Sapo**: SAPO Vídeos
|
||||||
- **savefrom.net**
|
- **savefrom.net**
|
||||||
- **SBS**: sbs.com.au
|
- **SBS**: sbs.com.au
|
||||||
|
- **schooltv**
|
||||||
- **SciVee**
|
- **SciVee**
|
||||||
- **screen.yahoo:search**: Yahoo screen search
|
- **screen.yahoo:search**: Yahoo screen search
|
||||||
- **Screencast**
|
- **Screencast**
|
||||||
@@ -675,7 +686,9 @@
|
|||||||
- **VideoPremium**
|
- **VideoPremium**
|
||||||
- **VideoTt**: video.tt - Your True Tube (Currently broken)
|
- **VideoTt**: video.tt - Your True Tube (Currently broken)
|
||||||
- **videoweed**: VideoWeed
|
- **videoweed**: VideoWeed
|
||||||
- **Vidme**
|
- **vidme**
|
||||||
|
- **vidme:user**
|
||||||
|
- **vidme:user:likes**
|
||||||
- **Vidzi**
|
- **Vidzi**
|
||||||
- **vier**
|
- **vier**
|
||||||
- **vier:videos**
|
- **vier:videos**
|
||||||
|
@@ -14,6 +14,7 @@ from test.helper import FakeYDL, assertRegexpMatches
|
|||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_str, compat_urllib_error
|
from youtube_dl.compat import compat_str, compat_urllib_error
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
from youtube_dl.postprocessor.common import PostProcessor
|
from youtube_dl.postprocessor.common import PostProcessor
|
||||||
from youtube_dl.utils import ExtractorError, match_filter_func
|
from youtube_dl.utils import ExtractorError, match_filter_func
|
||||||
|
|
||||||
@@ -247,6 +248,17 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
|
|
||||||
def format_info(f_id):
|
def format_info(f_id):
|
||||||
info = YoutubeIE._formats[f_id].copy()
|
info = YoutubeIE._formats[f_id].copy()
|
||||||
|
|
||||||
|
# XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
|
||||||
|
# and 'vcodec', while in tests such information is incomplete since
|
||||||
|
# commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
|
||||||
|
# test_YoutubeDL.test_youtube_format_selection is broken without
|
||||||
|
# this fix
|
||||||
|
if 'acodec' in info and 'vcodec' not in info:
|
||||||
|
info['vcodec'] = 'none'
|
||||||
|
elif 'vcodec' in info and 'acodec' not in info:
|
||||||
|
info['acodec'] = 'none'
|
||||||
|
|
||||||
info['format_id'] = f_id
|
info['format_id'] = f_id
|
||||||
info['url'] = 'url:' + f_id
|
info['url'] = 'url:' + f_id
|
||||||
return info
|
return info
|
||||||
@@ -646,6 +658,42 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
|
self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
|
||||||
|
|
||||||
|
def test_do_not_override_ie_key_in_url_transparent(self):
|
||||||
|
ydl = YDL()
|
||||||
|
|
||||||
|
class Foo1IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'foo1:'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'foo2:',
|
||||||
|
'ie_key': 'Foo2',
|
||||||
|
}
|
||||||
|
|
||||||
|
class Foo2IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'foo2:'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': 'foo3:',
|
||||||
|
'ie_key': 'Foo3',
|
||||||
|
}
|
||||||
|
|
||||||
|
class Foo3IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'foo3:'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return _make_result([{'url': TEST_URL}])
|
||||||
|
|
||||||
|
ydl.add_info_extractor(Foo1IE(ydl))
|
||||||
|
ydl.add_info_extractor(Foo2IE(ydl))
|
||||||
|
ydl.add_info_extractor(Foo3IE(ydl))
|
||||||
|
ydl.extract_info('foo1:')
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['url'], TEST_URL)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -21,7 +21,7 @@ from youtube_dl.extractor import (
|
|||||||
NPOIE,
|
NPOIE,
|
||||||
ComedyCentralIE,
|
ComedyCentralIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
RaiIE,
|
RaiTVIE,
|
||||||
VikiIE,
|
VikiIE,
|
||||||
ThePlatformIE,
|
ThePlatformIE,
|
||||||
ThePlatformFeedIE,
|
ThePlatformFeedIE,
|
||||||
@@ -65,16 +65,16 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles.keys()), 13)
|
self.assertEqual(len(subtitles.keys()), 13)
|
||||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
|
||||||
for lang in ['it', 'fr', 'de']:
|
for lang in ['fr', 'de']:
|
||||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
def test_youtube_subtitles_sbv_format(self):
|
def test_youtube_subtitles_ttml_format(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['subtitlesformat'] = 'sbv'
|
self.DL.params['subtitlesformat'] = 'ttml'
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
|
||||||
|
|
||||||
def test_youtube_subtitles_vtt_format(self):
|
def test_youtube_subtitles_vtt_format(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
@@ -260,7 +260,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
|||||||
|
|
||||||
class TestRaiSubtitles(BaseTestSubtitles):
|
class TestRaiSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||||
IE = RaiIE
|
IE = RaiTVIE
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
|
@@ -22,6 +22,7 @@ from youtube_dl.utils import (
|
|||||||
DateRange,
|
DateRange,
|
||||||
detect_exe_version,
|
detect_exe_version,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
dict_get,
|
||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
escape_rfc3986,
|
escape_rfc3986,
|
||||||
@@ -450,6 +451,28 @@ class TestUtil(unittest.TestCase):
|
|||||||
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||||
self.assertTrue(isinstance(data, bytes))
|
self.assertTrue(isinstance(data, bytes))
|
||||||
|
|
||||||
|
def test_dict_get(self):
|
||||||
|
FALSE_VALUES = {
|
||||||
|
'none': None,
|
||||||
|
'false': False,
|
||||||
|
'zero': 0,
|
||||||
|
'empty_string': '',
|
||||||
|
'empty_list': [],
|
||||||
|
}
|
||||||
|
d = FALSE_VALUES.copy()
|
||||||
|
d['a'] = 42
|
||||||
|
self.assertEqual(dict_get(d, 'a'), 42)
|
||||||
|
self.assertEqual(dict_get(d, 'b'), None)
|
||||||
|
self.assertEqual(dict_get(d, 'b', 42), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('a', )), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', )), None)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
|
||||||
|
for key, false_value in FALSE_VALUES.items():
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
|
||||||
|
|
||||||
def test_encode_compat_str(self):
|
def test_encode_compat_str(self):
|
||||||
self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
|
self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
|
||||||
self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
|
self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
|
||||||
@@ -471,6 +494,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
d = json.loads(stripped)
|
d = json.loads(stripped)
|
||||||
self.assertEqual(d, {'STATUS': 'OK'})
|
self.assertEqual(d, {'STATUS': 'OK'})
|
||||||
|
|
||||||
|
stripped = strip_jsonp('ps.embedHandler({"status": "success"});')
|
||||||
|
d = json.loads(stripped)
|
||||||
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
def test_uppercase_escape(self):
|
def test_uppercase_escape(self):
|
||||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||||
|
@@ -34,7 +34,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
# TODO find a > 100 (paginating?) videos course
|
# TODO find a > 100 (paginating?) videos course
|
||||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
entries = result['entries']
|
entries = list(result['entries'])
|
||||||
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||||
|
@@ -263,7 +263,7 @@ class YoutubeDL(object):
|
|||||||
the downloader (see youtube_dl/downloader/common.py):
|
the downloader (see youtube_dl/downloader/common.py):
|
||||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||||
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||||
xattr_set_filesize, external_downloader_args.
|
xattr_set_filesize, external_downloader_args, hls_use_mpegts.
|
||||||
|
|
||||||
The following options are used by the post processors:
|
The following options are used by the post processors:
|
||||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||||
@@ -707,7 +707,6 @@ class YoutubeDL(object):
|
|||||||
It will also download the videos if 'download'.
|
It will also download the videos if 'download'.
|
||||||
Returns the resolved ie_result.
|
Returns the resolved ie_result.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
result_type = ie_result.get('_type', 'video')
|
result_type = ie_result.get('_type', 'video')
|
||||||
|
|
||||||
if result_type in ('url', 'url_transparent'):
|
if result_type in ('url', 'url_transparent'):
|
||||||
@@ -736,7 +735,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
force_properties = dict(
|
force_properties = dict(
|
||||||
(k, v) for k, v in ie_result.items() if v is not None)
|
(k, v) for k, v in ie_result.items() if v is not None)
|
||||||
for f in ('_type', 'url'):
|
for f in ('_type', 'url', 'ie_key'):
|
||||||
if f in force_properties:
|
if f in force_properties:
|
||||||
del force_properties[f]
|
del force_properties[f]
|
||||||
new_result = info.copy()
|
new_result = info.copy()
|
||||||
@@ -1289,6 +1288,9 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if format.get('format_id') is None:
|
if format.get('format_id') is None:
|
||||||
format['format_id'] = compat_str(i)
|
format['format_id'] = compat_str(i)
|
||||||
|
else:
|
||||||
|
# Sanitize format_id from characters used in format selector expression
|
||||||
|
format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
|
||||||
format_id = format['format_id']
|
format_id = format['format_id']
|
||||||
if format_id not in formats_dict:
|
if format_id not in formats_dict:
|
||||||
formats_dict[format_id] = []
|
formats_dict[format_id] = []
|
||||||
@@ -1339,7 +1341,6 @@ class YoutubeDL(object):
|
|||||||
if req_format is None:
|
if req_format is None:
|
||||||
req_format_list = []
|
req_format_list = []
|
||||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||||
info_dict['extractor'] in ['youtube', 'ted'] and
|
|
||||||
not info_dict.get('is_live')):
|
not info_dict.get('is_live')):
|
||||||
merger = FFmpegMergerPP(self)
|
merger = FFmpegMergerPP(self)
|
||||||
if merger.available and merger.can_merge():
|
if merger.available and merger.can_merge():
|
||||||
@@ -1796,7 +1797,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
res = '%sp' % format['height']
|
res = '%sp' % format['height']
|
||||||
elif format.get('width') is not None:
|
elif format.get('width') is not None:
|
||||||
res = '?x%d' % format['width']
|
res = '%dx?' % format['width']
|
||||||
else:
|
else:
|
||||||
res = default
|
res = default
|
||||||
return res
|
return res
|
||||||
|
@@ -369,6 +369,7 @@ def _real_main(argv=None):
|
|||||||
'no_color': opts.no_color,
|
'no_color': opts.no_color,
|
||||||
'ffmpeg_location': opts.ffmpeg_location,
|
'ffmpeg_location': opts.ffmpeg_location,
|
||||||
'hls_prefer_native': opts.hls_prefer_native,
|
'hls_prefer_native': opts.hls_prefer_native,
|
||||||
|
'hls_use_mpegts': opts.hls_use_mpegts,
|
||||||
'external_downloader_args': external_downloader_args,
|
'external_downloader_args': external_downloader_args,
|
||||||
'postprocessor_args': postprocessor_args,
|
'postprocessor_args': postprocessor_args,
|
||||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||||
|
@@ -45,6 +45,7 @@ class FileDownloader(object):
|
|||||||
(experimental)
|
(experimental)
|
||||||
external_downloader_args: A list of additional command-line arguments for the
|
external_downloader_args: A list of additional command-line arguments for the
|
||||||
external downloader.
|
external downloader.
|
||||||
|
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||||
|
|
||||||
Subclasses of this one must re-define the real_download method.
|
Subclasses of this one must re-define the real_download method.
|
||||||
"""
|
"""
|
||||||
|
@@ -40,6 +40,7 @@ class DashSegmentsFD(FileDownloader):
|
|||||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||||
|
|
||||||
with open(tmpfilename, 'wb') as outf:
|
with open(tmpfilename, 'wb') as outf:
|
||||||
|
if info_dict.get('initialization_url'):
|
||||||
append_url_to_file(
|
append_url_to_file(
|
||||||
outf, combine_url(base_url, info_dict['initialization_url']),
|
outf, combine_url(base_url, info_dict['initialization_url']),
|
||||||
'initialization segment')
|
'initialization segment')
|
||||||
|
@@ -273,15 +273,21 @@ class F4mFD(FragmentFD):
|
|||||||
return fragments_list
|
return fragments_list
|
||||||
|
|
||||||
def _parse_bootstrap_node(self, node, base_url):
|
def _parse_bootstrap_node(self, node, base_url):
|
||||||
if node.text is None:
|
# Sometimes non empty inline bootstrap info can be specified along
|
||||||
|
# with bootstrap url attribute (e.g. dummy inline bootstrap info
|
||||||
|
# contains whitespace characters in [1]). We will prefer bootstrap
|
||||||
|
# url over inline bootstrap info when present.
|
||||||
|
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
|
||||||
|
bootstrap_url = node.get('url')
|
||||||
|
if bootstrap_url:
|
||||||
bootstrap_url = compat_urlparse.urljoin(
|
bootstrap_url = compat_urlparse.urljoin(
|
||||||
base_url, node.attrib['url'])
|
base_url, bootstrap_url)
|
||||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||||
else:
|
else:
|
||||||
bootstrap_url = None
|
bootstrap_url = None
|
||||||
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||||
boot_info = read_bootstrap_info(bootstrap)
|
boot_info = read_bootstrap_info(bootstrap)
|
||||||
return (boot_info, bootstrap_url)
|
return boot_info, bootstrap_url
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
@@ -316,7 +322,8 @@ class F4mFD(FragmentFD):
|
|||||||
metadata = None
|
metadata = None
|
||||||
|
|
||||||
fragments_list = build_fragments_list(boot_info)
|
fragments_list = build_fragments_list(boot_info)
|
||||||
if self.params.get('test', False):
|
test = self.params.get('test', False)
|
||||||
|
if test:
|
||||||
# We only download the first fragment
|
# We only download the first fragment
|
||||||
fragments_list = fragments_list[:1]
|
fragments_list = fragments_list[:1]
|
||||||
total_frags = len(fragments_list)
|
total_frags = len(fragments_list)
|
||||||
@@ -326,6 +333,7 @@ class F4mFD(FragmentFD):
|
|||||||
ctx = {
|
ctx = {
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'total_frags': total_frags,
|
'total_frags': total_frags,
|
||||||
|
'live': live,
|
||||||
}
|
}
|
||||||
|
|
||||||
self._prepare_frag_download(ctx)
|
self._prepare_frag_download(ctx)
|
||||||
@@ -380,7 +388,7 @@ class F4mFD(FragmentFD):
|
|||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if not fragments_list and live and bootstrap_url:
|
if not fragments_list and not test and live and bootstrap_url:
|
||||||
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
|
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
|
||||||
total_frags += len(fragments_list)
|
total_frags += len(fragments_list)
|
||||||
if fragments_list and (fragments_list[0][1] > frag_i + 1):
|
if fragments_list and (fragments_list[0][1] > frag_i + 1):
|
||||||
|
@@ -26,7 +26,11 @@ class FragmentFD(FileDownloader):
|
|||||||
self._start_frag_download(ctx)
|
self._start_frag_download(ctx)
|
||||||
|
|
||||||
def _prepare_frag_download(self, ctx):
|
def _prepare_frag_download(self, ctx):
|
||||||
self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
|
if 'live' not in ctx:
|
||||||
|
ctx['live'] = False
|
||||||
|
self.to_screen(
|
||||||
|
'[%s] Total fragments: %s'
|
||||||
|
% (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
|
||||||
self.report_destination(ctx['filename'])
|
self.report_destination(ctx['filename'])
|
||||||
dl = HttpQuietDownloader(
|
dl = HttpQuietDownloader(
|
||||||
self.ydl,
|
self.ydl,
|
||||||
@@ -74,14 +78,14 @@ class FragmentFD(FileDownloader):
|
|||||||
if s['status'] not in ('downloading', 'finished'):
|
if s['status'] not in ('downloading', 'finished'):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
time_now = time.time()
|
||||||
|
state['elapsed'] = time_now - start
|
||||||
frag_total_bytes = s.get('total_bytes') or 0
|
frag_total_bytes = s.get('total_bytes') or 0
|
||||||
|
if not ctx['live']:
|
||||||
estimated_size = (
|
estimated_size = (
|
||||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
|
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
|
||||||
(state['frag_index'] + 1) * total_frags)
|
(state['frag_index'] + 1) * total_frags)
|
||||||
time_now = time.time()
|
|
||||||
state['total_bytes_estimate'] = estimated_size
|
state['total_bytes_estimate'] = estimated_size
|
||||||
state['elapsed'] = time_now - start
|
|
||||||
|
|
||||||
if s['status'] == 'finished':
|
if s['status'] == 'finished':
|
||||||
state['frag_index'] += 1
|
state['frag_index'] += 1
|
||||||
@@ -91,6 +95,7 @@ class FragmentFD(FileDownloader):
|
|||||||
else:
|
else:
|
||||||
frag_downloaded_bytes = s['downloaded_bytes']
|
frag_downloaded_bytes = s['downloaded_bytes']
|
||||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||||
|
if not ctx['live']:
|
||||||
state['eta'] = self.calc_eta(
|
state['eta'] = self.calc_eta(
|
||||||
start, time_now, estimated_size,
|
start, time_now, estimated_size,
|
||||||
state['downloaded_bytes'])
|
state['downloaded_bytes'])
|
||||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
@@ -39,7 +40,11 @@ class HlsFD(FileDownloader):
|
|||||||
'-headers',
|
'-headers',
|
||||||
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
||||||
|
|
||||||
args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
args += ['-i', url, '-c', 'copy']
|
||||||
|
if self.params.get('hls_use_mpegts', False):
|
||||||
|
args += ['-f', 'mpegts']
|
||||||
|
else:
|
||||||
|
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||||
|
|
||||||
args = [encodeArgument(opt) for opt in args]
|
args = [encodeArgument(opt) for opt in args]
|
||||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||||
@@ -53,7 +58,9 @@ class HlsFD(FileDownloader):
|
|||||||
# subprocces.run would send the SIGKILL signal to ffmpeg and the
|
# subprocces.run would send the SIGKILL signal to ffmpeg and the
|
||||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
||||||
# produces a file that is playable (this is mostly useful for live
|
# produces a file that is playable (this is mostly useful for live
|
||||||
# streams)
|
# streams). Note that Windows is not affected and produces playable
|
||||||
|
# files (see https://github.com/rg3/youtube-dl/issues/8300).
|
||||||
|
if sys.platform != 'win32':
|
||||||
proc.communicate(b'q')
|
proc.communicate(b'q')
|
||||||
raise
|
raise
|
||||||
if retval == 0:
|
if retval == 0:
|
||||||
|
@@ -50,7 +50,7 @@ from .atresplayer import AtresPlayerIE
|
|||||||
from .atttechchannel import ATTTechChannelIE
|
from .atttechchannel import ATTTechChannelIE
|
||||||
from .audimedia import AudiMediaIE
|
from .audimedia import AudiMediaIE
|
||||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||||
from .azubu import AzubuIE
|
from .azubu import AzubuIE, AzubuLiveIE
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
@@ -89,8 +89,15 @@ from .camdemy import (
|
|||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .canvas import CanvasIE
|
from .canvas import CanvasIE
|
||||||
|
from .cbc import (
|
||||||
|
CBCIE,
|
||||||
|
CBCPlayerIE,
|
||||||
|
)
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbsnews import CBSNewsIE
|
from .cbsnews import (
|
||||||
|
CBSNewsIE,
|
||||||
|
CBSNewsLiveVideoIE,
|
||||||
|
)
|
||||||
from .cbssports import CBSSportsIE
|
from .cbssports import CBSSportsIE
|
||||||
from .ccc import CCCIE
|
from .ccc import CCCIE
|
||||||
from .ceskatelevize import CeskaTelevizeIE
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
@@ -123,6 +130,7 @@ from .comcarcoff import ComCarCoffIE
|
|||||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
|
from .crackle import CrackleIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
from .crooksandliars import CrooksAndLiarsIE
|
from .crooksandliars import CrooksAndLiarsIE
|
||||||
from .crunchyroll import (
|
from .crunchyroll import (
|
||||||
@@ -142,6 +150,8 @@ from .dailymotion import (
|
|||||||
from .daum import (
|
from .daum import (
|
||||||
DaumIE,
|
DaumIE,
|
||||||
DaumClipIE,
|
DaumClipIE,
|
||||||
|
DaumPlaylistIE,
|
||||||
|
DaumUserIE,
|
||||||
)
|
)
|
||||||
from .dbtv import DBTVIE
|
from .dbtv import DBTVIE
|
||||||
from .dcn import (
|
from .dcn import (
|
||||||
@@ -323,6 +333,7 @@ from .keezmovies import KeezMoviesIE
|
|||||||
from .khanacademy import KhanAcademyIE
|
from .khanacademy import KhanAcademyIE
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
|
from .konserthusetplay import KonserthusetPlayIE
|
||||||
from .kontrtube import KontrTubeIE
|
from .kontrtube import KontrTubeIE
|
||||||
from .krasview import KrasViewIE
|
from .krasview import KrasViewIE
|
||||||
from .ku6 import Ku6IE
|
from .ku6 import Ku6IE
|
||||||
@@ -372,6 +383,7 @@ from .macgamestore import MacGameStoreIE
|
|||||||
from .mailru import MailRuIE
|
from .mailru import MailRuIE
|
||||||
from .makertv import MakerTVIE
|
from .makertv import MakerTVIE
|
||||||
from .malemotion import MalemotionIE
|
from .malemotion import MalemotionIE
|
||||||
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
@@ -482,6 +494,7 @@ from .npo import (
|
|||||||
NPOLiveIE,
|
NPOLiveIE,
|
||||||
NPORadioIE,
|
NPORadioIE,
|
||||||
NPORadioFragmentIE,
|
NPORadioFragmentIE,
|
||||||
|
SchoolTVIE,
|
||||||
VPROIE,
|
VPROIE,
|
||||||
WNLIE
|
WNLIE
|
||||||
)
|
)
|
||||||
@@ -525,6 +538,7 @@ from .planetaplay import PlanetaPlayIE
|
|||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .played import PlayedIE
|
from .played import PlayedIE
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
|
from .plays import PlaysTVIE
|
||||||
from .playtvak import PlaytvakIE
|
from .playtvak import PlaytvakIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .playwire import PlaywireIE
|
from .playwire import PlaywireIE
|
||||||
@@ -815,7 +829,11 @@ from .videomore import (
|
|||||||
)
|
)
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
from .videott import VideoTtIE
|
from .videott import VideoTtIE
|
||||||
from .vidme import VidmeIE
|
from .vidme import (
|
||||||
|
VidmeIE,
|
||||||
|
VidmeUserIE,
|
||||||
|
VidmeUserLikesIE,
|
||||||
|
)
|
||||||
from .vidzi import VidziIE
|
from .vidzi import VidziIE
|
||||||
from .vier import VierIE, VierVideosIE
|
from .vier import VierIE, VierVideosIE
|
||||||
from .viewster import ViewsterIE
|
from .viewster import ViewsterIE
|
||||||
|
@@ -8,11 +8,7 @@ from ..compat import compat_str
|
|||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class ACastBaseIE(InfoExtractor):
|
class ACastIE(InfoExtractor):
|
||||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
|
||||||
|
|
||||||
|
|
||||||
class ACastIE(ACastBaseIE):
|
|
||||||
IE_NAME = 'acast'
|
IE_NAME = 'acast'
|
||||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@@ -23,14 +19,19 @@ class ACastIE(ACastBaseIE):
|
|||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||||
'timestamp': 1196172000000,
|
'timestamp': 1196172000000,
|
||||||
'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e',
|
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||||
'duration': 211,
|
'duration': 211,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id)
|
|
||||||
|
embed_page = self._download_webpage(
|
||||||
|
re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id)
|
||||||
|
cast_data = self._parse_json(self._search_regex(
|
||||||
|
r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'),
|
||||||
|
display_id)['GetAcast/%s/%s' % (channel, display_id)]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': compat_str(cast_data['id']),
|
'id': compat_str(cast_data['id']),
|
||||||
@@ -44,7 +45,7 @@ class ACastIE(ACastBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ACastChannelIE(ACastBaseIE):
|
class ACastChannelIE(InfoExtractor):
|
||||||
IE_NAME = 'acast:channel'
|
IE_NAME = 'acast:channel'
|
||||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
|
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@@ -56,6 +57,7 @@ class ACastChannelIE(ACastBaseIE):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 20,
|
'playlist_mincount': 20,
|
||||||
}
|
}
|
||||||
|
_API_BASE_URL = 'https://www.acast.com/api/'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
|
@@ -8,6 +8,8 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
qualities,
|
qualities,
|
||||||
|
unescapeHTML,
|
||||||
|
xpath_element,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -31,7 +33,7 @@ class AllocineIE(InfoExtractor):
|
|||||||
'id': '19540403',
|
'id': '19540403',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Planes 2 Bande-annonce VF',
|
'title': 'Planes 2 Bande-annonce VF',
|
||||||
'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e',
|
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
@@ -41,7 +43,7 @@ class AllocineIE(InfoExtractor):
|
|||||||
'id': '19544709',
|
'id': '19544709',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||||
'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
|
'description': 'md5:601d15393ac40f249648ef000720e7e3',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
@@ -59,14 +61,18 @@ class AllocineIE(InfoExtractor):
|
|||||||
if typ == 'film':
|
if typ == 'film':
|
||||||
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
|
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
|
||||||
else:
|
else:
|
||||||
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
|
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
|
||||||
|
if player:
|
||||||
player_data = json.loads(player)
|
player_data = json.loads(player)
|
||||||
video_id = compat_str(player_data['refMedia'])
|
video_id = compat_str(player_data['refMedia'])
|
||||||
|
else:
|
||||||
|
model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
|
||||||
|
model_data = self._parse_json(unescapeHTML(model), display_id)
|
||||||
|
video_id = compat_str(model_data['id'])
|
||||||
|
|
||||||
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
|
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
|
||||||
|
|
||||||
video = xml.find('.//AcVisionVideo').attrib
|
video = xpath_element(xml, './/AcVisionVideo').attrib
|
||||||
quality = qualities(['ld', 'md', 'hd'])
|
quality = qualities(['ld', 'md', 'hd'])
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@@ -13,6 +13,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
NO_DEFAULT,
|
||||||
qualities,
|
qualities,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -93,9 +94,18 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
json_url = self._html_search_regex(
|
json_url = self._html_search_regex(
|
||||||
patterns, webpage, 'json vp url', default=None)
|
patterns, webpage, 'json vp url', default=None)
|
||||||
if not json_url:
|
if not json_url:
|
||||||
iframe_url = self._html_search_regex(
|
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||||
|
return self._html_search_regex(
|
||||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||||
webpage, 'iframe url', group='url')
|
webpage, 'iframe url', group='url', default=default)
|
||||||
|
|
||||||
|
iframe_url = find_iframe_url(webpage, None)
|
||||||
|
if not iframe_url:
|
||||||
|
embed_url = self._html_search_regex(
|
||||||
|
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url')
|
||||||
|
player = self._download_json(
|
||||||
|
embed_url, video_id, 'Downloading player page')
|
||||||
|
iframe_url = find_iframe_url(player['html'])
|
||||||
json_url = compat_parse_qs(
|
json_url = compat_parse_qs(
|
||||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||||
return self._extract_from_json_url(json_url, video_id, lang)
|
return self._extract_from_json_url(json_url, video_id, lang)
|
||||||
|
@@ -3,7 +3,11 @@ from __future__ import unicode_literals
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import float_or_none
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
sanitized_Request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AzubuIE(InfoExtractor):
|
class AzubuIE(InfoExtractor):
|
||||||
@@ -91,3 +95,37 @@ class AzubuIE(InfoExtractor):
|
|||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AzubuLiveIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user = self._match_id(url)
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user),
|
||||||
|
user)['data']
|
||||||
|
if info['type'] != 'STREAM':
|
||||||
|
raise ExtractorError('{0} is not streaming live'.format(user), expected=True)
|
||||||
|
|
||||||
|
req = sanitized_Request(
|
||||||
|
'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id'])
|
||||||
|
req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV')
|
||||||
|
bc_info = self._download_json(req, user)
|
||||||
|
m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': info['id'],
|
||||||
|
'title': self._live_title(info['title']),
|
||||||
|
'uploader_id': user,
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
'thumbnail': bc_info['poster'],
|
||||||
|
}
|
||||||
|
@@ -1,7 +1,13 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BpbIE(InfoExtractor):
|
class BpbIE(InfoExtractor):
|
||||||
@@ -10,7 +16,8 @@ class BpbIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||||
'md5': '0792086e8e2bfbac9cdf27835d5f2093',
|
# md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2
|
||||||
|
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '297',
|
'id': '297',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -25,13 +32,26 @@ class BpbIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||||
video_url = self._html_search_regex(
|
video_info_dicts = re.findall(
|
||||||
r'(http://film\.bpb\.de/player/dokument_[0-9]+\.mp4)',
|
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
|
||||||
webpage, 'video URL')
|
|
||||||
|
formats = []
|
||||||
|
for video_info in video_info_dicts:
|
||||||
|
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
|
||||||
|
quality = video_info['quality']
|
||||||
|
video_url = video_info['src']
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'preference': 10 if quality == 'high' else 0,
|
||||||
|
'format_note': quality,
|
||||||
|
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
}
|
}
|
||||||
|
113
youtube_dl/extractor/cbc.py
Normal file
113
youtube_dl/extractor/cbc.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import js_to_json
|
||||||
|
|
||||||
|
|
||||||
|
class CBCIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# with mediaId
|
||||||
|
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2682904050',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Don Cherry – All-Stars',
|
||||||
|
'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
|
||||||
|
'timestamp': 1454475540,
|
||||||
|
'upload_date': '20160203',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# with clipId
|
||||||
|
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2487345465',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
||||||
|
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
||||||
|
'upload_date': '19700101',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# multiple iframes
|
||||||
|
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2680832926',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
|
||||||
|
'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
|
||||||
|
'upload_date': '19700101',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2658915080',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Fly like an eagle!',
|
||||||
|
'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
|
||||||
|
'upload_date': '19700101',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
player_init = self._search_regex(
|
||||||
|
r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
|
||||||
|
default=None)
|
||||||
|
if player_init:
|
||||||
|
player_info = self._parse_json(player_init, display_id, js_to_json)
|
||||||
|
media_id = player_info.get('mediaId')
|
||||||
|
if not media_id:
|
||||||
|
clip_id = player_info['clipId']
|
||||||
|
media_id = self._download_json(
|
||||||
|
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||||
|
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||||
|
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||||
|
else:
|
||||||
|
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||||
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
|
||||||
|
class CBCPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2683190193',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Gerry Runs a Sweat Shop',
|
||||||
|
'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
|
||||||
|
'timestamp': 1455067800,
|
||||||
|
'upload_date': '20160210',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
|
||||||
|
'ThePlatformFeed', video_id)
|
@@ -1,15 +1,14 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
from .common import InfoExtractor
|
||||||
import json
|
|
||||||
|
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
|
from ..utils import parse_duration
|
||||||
|
|
||||||
|
|
||||||
class CBSNewsIE(ThePlatformIE):
|
class CBSNewsIE(ThePlatformIE):
|
||||||
IE_DESC = 'CBS News'
|
IE_DESC = 'CBS News'
|
||||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
|
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -48,14 +47,13 @@ class CBSNewsIE(ThePlatformIE):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_info = json.loads(self._html_search_regex(
|
video_info = self._parse_json(self._html_search_regex(
|
||||||
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||||
webpage, 'video JSON info'))
|
webpage, 'video JSON info'), video_id)
|
||||||
|
|
||||||
item = video_info['item'] if 'item' in video_info else video_info
|
item = video_info['item'] if 'item' in video_info else video_info
|
||||||
title = item.get('articleTitle') or item.get('hed')
|
title = item.get('articleTitle') or item.get('hed')
|
||||||
@@ -88,3 +86,41 @@ class CBSNewsIE(ThePlatformIE):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||||
|
IE_DESC = 'CBS News Live Videos'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||||
|
'duration': 334,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_info = self._parse_json(self._html_search_regex(
|
||||||
|
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
|
||||||
|
|
||||||
|
hdcore_sign = 'hdcore=3.3.1'
|
||||||
|
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
|
||||||
|
if f4m_formats:
|
||||||
|
for entry in f4m_formats:
|
||||||
|
# URLs without the extra param induce an 404 error
|
||||||
|
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_info['headline'],
|
||||||
|
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
|
||||||
|
'duration': parse_duration(video_info.get('segmentDur')),
|
||||||
|
'formats': f4m_formats,
|
||||||
|
}
|
||||||
|
@@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
@@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'miranda-sings-happy-thanksgiving-miranda',
|
'id': '2494164',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20141127',
|
'upload_date': '20141127',
|
||||||
'timestamp': 1417107600,
|
'timestamp': 1417107600,
|
||||||
'duration': 1232,
|
'duration': 1232,
|
||||||
'title': 'Happy Thanksgiving Miranda',
|
'title': 'Happy Thanksgiving Miranda',
|
||||||
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
|
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
|
||||||
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'requires ffmpeg',
|
'skip_download': 'requires ffmpeg',
|
||||||
@@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor):
|
|||||||
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
|
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
|
||||||
display_id)['videoData']
|
display_id)['videoData']
|
||||||
|
|
||||||
video_id = full_data['activeVideo']['video']
|
display_id = full_data['activeVideo']['video']
|
||||||
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
|
video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
|
||||||
|
video_id = compat_str(video_data['mediaId'])
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
'url': video_data['images']['thumb'],
|
'url': video_data['images']['thumb'],
|
||||||
}, {
|
}, {
|
||||||
'url': video_data['images']['poster'],
|
'url': video_data['images']['poster'],
|
||||||
}]
|
}]
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
video_data['mediaUrl'], video_id, ext='mp4')
|
|
||||||
|
|
||||||
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
|
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
|
||||||
video_data.get('pubDate'))
|
video_data.get('pubDate'))
|
||||||
@@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor):
|
|||||||
video_data.get('duration'))
|
video_data.get('duration'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'crackle:%s' % video_id,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': video_data['title'],
|
'title': video_data['title'],
|
||||||
@@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor):
|
|||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'formats': formats,
|
'season_number': int_or_none(video_data.get('season')),
|
||||||
|
'episode_number': int_or_none(video_data.get('episode')),
|
||||||
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
||||||
}
|
}
|
||||||
|
@@ -10,6 +10,7 @@ import re
|
|||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import math
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
@@ -44,6 +45,7 @@ from ..utils import (
|
|||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
|
parse_duration,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -828,7 +830,7 @@ class InfoExtractor(object):
|
|||||||
for f in formats:
|
for f in formats:
|
||||||
# Automatically determine tbr when missing based on abr and vbr (improves
|
# Automatically determine tbr when missing based on abr and vbr (improves
|
||||||
# formats sorting in some cases)
|
# formats sorting in some cases)
|
||||||
if 'tbr' not in f and 'abr' in f and 'vbr' in f:
|
if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
|
||||||
f['tbr'] = f['abr'] + f['vbr']
|
f['tbr'] = f['abr'] + f['vbr']
|
||||||
|
|
||||||
def _formats_key(f):
|
def _formats_key(f):
|
||||||
@@ -1184,11 +1186,13 @@ class InfoExtractor(object):
|
|||||||
http_count = 0
|
http_count = 0
|
||||||
m3u8_count = 0
|
m3u8_count = 0
|
||||||
|
|
||||||
|
srcs = []
|
||||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||||
for video in videos:
|
for video in videos:
|
||||||
src = video.get('src')
|
src = video.get('src')
|
||||||
if not src:
|
if not src or src in srcs:
|
||||||
continue
|
continue
|
||||||
|
srcs.append(src)
|
||||||
|
|
||||||
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||||
@@ -1220,6 +1224,7 @@ class InfoExtractor(object):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||||
|
src_url = src_url.strip()
|
||||||
|
|
||||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
@@ -1265,11 +1270,13 @@ class InfoExtractor(object):
|
|||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||||
|
urls = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||||
src = textstream.get('src')
|
src = textstream.get('src')
|
||||||
if not src:
|
if not src or src in urls:
|
||||||
continue
|
continue
|
||||||
|
urls.append(src)
|
||||||
ext = textstream.get('ext') or determine_ext(src)
|
ext = textstream.get('ext') or determine_ext(src)
|
||||||
if not ext:
|
if not ext:
|
||||||
type_ = textstream.get('type')
|
type_ = textstream.get('type')
|
||||||
@@ -1330,6 +1337,163 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
||||||
|
res = self._download_webpage_handle(
|
||||||
|
mpd_url, video_id,
|
||||||
|
note=note or 'Downloading MPD manifest',
|
||||||
|
errnote=errnote or 'Failed to download MPD manifest',
|
||||||
|
fatal=fatal)
|
||||||
|
if res is False:
|
||||||
|
return []
|
||||||
|
mpd, urlh = res
|
||||||
|
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||||
|
|
||||||
|
return self._parse_mpd_formats(
|
||||||
|
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||||
|
|
||||||
|
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||||
|
if mpd_doc.get('type') == 'dynamic':
|
||||||
|
return []
|
||||||
|
|
||||||
|
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
|
||||||
|
|
||||||
|
def _add_ns(path):
|
||||||
|
return self._xpath_ns(path, namespace)
|
||||||
|
|
||||||
|
def is_drm_protected(element):
|
||||||
|
return element.find(_add_ns('ContentProtection')) is not None
|
||||||
|
|
||||||
|
def extract_multisegment_info(element, ms_parent_info):
|
||||||
|
ms_info = ms_parent_info.copy()
|
||||||
|
segment_list = element.find(_add_ns('SegmentList'))
|
||||||
|
if segment_list is not None:
|
||||||
|
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||||
|
if segment_urls_e:
|
||||||
|
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
||||||
|
initialization = segment_list.find(_add_ns('Initialization'))
|
||||||
|
if initialization is not None:
|
||||||
|
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||||
|
else:
|
||||||
|
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||||
|
if segment_template is not None:
|
||||||
|
start_number = segment_template.get('startNumber')
|
||||||
|
if start_number:
|
||||||
|
ms_info['start_number'] = int(start_number)
|
||||||
|
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
|
||||||
|
if segment_timeline is not None:
|
||||||
|
s_e = segment_timeline.findall(_add_ns('S'))
|
||||||
|
if s_e:
|
||||||
|
ms_info['total_number'] = 0
|
||||||
|
for s in s_e:
|
||||||
|
ms_info['total_number'] += 1 + int(s.get('r', '0'))
|
||||||
|
else:
|
||||||
|
timescale = segment_template.get('timescale')
|
||||||
|
if timescale:
|
||||||
|
ms_info['timescale'] = int(timescale)
|
||||||
|
segment_duration = segment_template.get('duration')
|
||||||
|
if segment_duration:
|
||||||
|
ms_info['segment_duration'] = int(segment_duration)
|
||||||
|
media_template = segment_template.get('media')
|
||||||
|
if media_template:
|
||||||
|
ms_info['media_template'] = media_template
|
||||||
|
initialization = segment_template.get('initialization')
|
||||||
|
if initialization:
|
||||||
|
ms_info['initialization_url'] = initialization
|
||||||
|
else:
|
||||||
|
initialization = segment_template.find(_add_ns('Initialization'))
|
||||||
|
if initialization is not None:
|
||||||
|
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||||
|
return ms_info
|
||||||
|
|
||||||
|
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||||
|
formats = []
|
||||||
|
for period in mpd_doc.findall(_add_ns('Period')):
|
||||||
|
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||||
|
period_ms_info = extract_multisegment_info(period, {
|
||||||
|
'start_number': 1,
|
||||||
|
'timescale': 1,
|
||||||
|
})
|
||||||
|
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
|
||||||
|
if is_drm_protected(adaptation_set):
|
||||||
|
continue
|
||||||
|
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
|
||||||
|
for representation in adaptation_set.findall(_add_ns('Representation')):
|
||||||
|
if is_drm_protected(representation):
|
||||||
|
continue
|
||||||
|
representation_attrib = adaptation_set.attrib.copy()
|
||||||
|
representation_attrib.update(representation.attrib)
|
||||||
|
mime_type = representation_attrib.get('mimeType')
|
||||||
|
content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType')
|
||||||
|
if content_type == 'text':
|
||||||
|
# TODO implement WebVTT downloading
|
||||||
|
pass
|
||||||
|
elif content_type == 'video' or content_type == 'audio':
|
||||||
|
base_url = ''
|
||||||
|
for element in (representation, adaptation_set, period, mpd_doc):
|
||||||
|
base_url_e = element.find(_add_ns('BaseURL'))
|
||||||
|
if base_url_e is not None:
|
||||||
|
base_url = base_url_e.text + base_url
|
||||||
|
if re.match(r'^https?://', base_url):
|
||||||
|
break
|
||||||
|
if mpd_base_url and not re.match(r'^https?://', base_url):
|
||||||
|
if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
|
||||||
|
mpd_base_url += '/'
|
||||||
|
base_url = mpd_base_url + base_url
|
||||||
|
representation_id = representation_attrib.get('id')
|
||||||
|
lang = representation_attrib.get('lang')
|
||||||
|
url_el = representation.find(_add_ns('BaseURL'))
|
||||||
|
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||||
|
f = {
|
||||||
|
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||||
|
'url': base_url,
|
||||||
|
'width': int_or_none(representation_attrib.get('width')),
|
||||||
|
'height': int_or_none(representation_attrib.get('height')),
|
||||||
|
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
||||||
|
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||||
|
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||||
|
'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
|
||||||
|
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
|
||||||
|
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||||
|
'format_note': 'DASH %s' % content_type,
|
||||||
|
'filesize': filesize,
|
||||||
|
}
|
||||||
|
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||||
|
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||||
|
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||||
|
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
|
||||||
|
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||||
|
media_template = representation_ms_info['media_template']
|
||||||
|
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||||
|
media_template = re.sub(r'\$(Number|Bandwidth)(?:%(0\d+)d)?\$', r'%(\1)\2d', media_template)
|
||||||
|
media_template.replace('$$', '$')
|
||||||
|
representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||||
|
if 'segment_urls' in representation_ms_info:
|
||||||
|
f.update({
|
||||||
|
'segment_urls': representation_ms_info['segment_urls'],
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
})
|
||||||
|
if 'initialization_url' in representation_ms_info:
|
||||||
|
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||||
|
f.update({
|
||||||
|
'initialization_url': initialization_url,
|
||||||
|
})
|
||||||
|
if not f.get('url'):
|
||||||
|
f['url'] = initialization_url
|
||||||
|
try:
|
||||||
|
existing_format = next(
|
||||||
|
fo for fo in formats
|
||||||
|
if fo['format_id'] == representation_id)
|
||||||
|
except StopIteration:
|
||||||
|
full_info = formats_dict.get(representation_id, {}).copy()
|
||||||
|
full_info.update(f)
|
||||||
|
formats.append(full_info)
|
||||||
|
else:
|
||||||
|
existing_format.update(f)
|
||||||
|
else:
|
||||||
|
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
def _live_title(self, name):
|
def _live_title(self, name):
|
||||||
""" Generate the title for a live video """
|
""" Generate the title for a live video """
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
|
95
youtube_dl/extractor/crackle.py
Normal file
95
youtube_dl/extractor/crackle.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class CrackleIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.crackle.com/the-art-of-more/2496419',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2496419',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Heavy Lies the Head',
|
||||||
|
'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
|
||||||
|
_SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
|
||||||
|
_UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
|
||||||
|
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||||
|
|
||||||
|
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||||
|
_MEDIA_FILE_SLOTS = {
|
||||||
|
'c544.flv': {
|
||||||
|
'width': 544,
|
||||||
|
'height': 306,
|
||||||
|
},
|
||||||
|
'360p.mp4': {
|
||||||
|
'width': 640,
|
||||||
|
'height': 360,
|
||||||
|
},
|
||||||
|
'480p.mp4': {
|
||||||
|
'width': 852,
|
||||||
|
'height': 478,
|
||||||
|
},
|
||||||
|
'480p_1mbps.mp4': {
|
||||||
|
'width': 852,
|
||||||
|
'height': 478,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
item = self._download_xml(
|
||||||
|
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||||
|
video_id).find('i')
|
||||||
|
title = item.attrib['t']
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
subtitles = {}
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
|
||||||
|
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||||
|
path = item.attrib.get('p')
|
||||||
|
if path:
|
||||||
|
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||||
|
http_base_url = 'http://ahttp.crackle.com/' + path
|
||||||
|
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
|
||||||
|
formats.append({
|
||||||
|
'url': http_base_url + mfs_path,
|
||||||
|
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||||
|
'width': mfs_info['width'],
|
||||||
|
'height': mfs_info['height'],
|
||||||
|
})
|
||||||
|
for cc in item.findall('cc'):
|
||||||
|
locale = cc.attrib.get('l')
|
||||||
|
v = cc.attrib.get('v')
|
||||||
|
if locale and v:
|
||||||
|
if locale not in subtitles:
|
||||||
|
subtitles[locale] = []
|
||||||
|
subtitles[locale] = [{
|
||||||
|
'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
|
||||||
|
'ext': 'ttml',
|
||||||
|
}]
|
||||||
|
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': item.attrib.get('d'),
|
||||||
|
'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
|
||||||
|
'series': item.attrib.get('sn'),
|
||||||
|
'season_number': int_or_none(item.attrib.get('se')),
|
||||||
|
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -113,7 +113,7 @@ class CSpanIE(InfoExtractor):
|
|||||||
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
|
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
|
||||||
})
|
})
|
||||||
if not formats:
|
if not formats:
|
||||||
path = get_text_attr(f, 'path')
|
path = unescapeHTML(get_text_attr(f, 'path'))
|
||||||
if not path:
|
if not path:
|
||||||
continue
|
continue
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
|
@@ -2,17 +2,26 @@
|
|||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DaumIE(InfoExtractor):
|
class DaumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/v/(?P<id>[^?#&]+)'
|
_VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
|
||||||
IE_NAME = 'daum.net'
|
IE_NAME = 'daum.net'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -23,25 +32,57 @@ class DaumIE(InfoExtractor):
|
|||||||
'title': '마크 헌트 vs 안토니오 실바',
|
'title': '마크 헌트 vs 안토니오 실바',
|
||||||
'description': 'Mark Hunt vs Antonio Silva',
|
'description': 'Mark Hunt vs Antonio Silva',
|
||||||
'upload_date': '20131217',
|
'upload_date': '20131217',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||||
'duration': 2117,
|
'duration': 2117,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.tvpot.daum.net/v/65139429',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '65139429',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
|
||||||
|
'description': 'md5:79794514261164ff27e36a21ad229fc5',
|
||||||
|
'upload_date': '20150604',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'duration': 154,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'vwIpVpCQsT8$',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '01-Korean War ( Trouble on the horizon )',
|
||||||
|
'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
|
||||||
|
'upload_date': '20080223',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'duration': 249,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
||||||
query = compat_urllib_parse.urlencode({'vid': video_id})
|
query = compat_urllib_parse.urlencode({'vid': video_id})
|
||||||
info = self._download_xml(
|
|
||||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
|
||||||
'Downloading video info')
|
|
||||||
movie_data = self._download_json(
|
movie_data = self._download_json(
|
||||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
|
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
|
||||||
video_id, 'Downloading video formats info')
|
video_id, 'Downloading video formats info')
|
||||||
|
|
||||||
|
# For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
|
||||||
|
if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
|
||||||
|
return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
|
||||||
|
|
||||||
|
info = self._download_xml(
|
||||||
|
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||||
|
'Downloading video info')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_el in movie_data['output_list']['output_list']:
|
for format_el in movie_data['output_list']['output_list']:
|
||||||
profile = format_el['profile']
|
profile = format_el['profile']
|
||||||
@@ -76,8 +117,9 @@ class DaumIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class DaumClipIE(InfoExtractor):
|
class DaumClipIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
||||||
IE_NAME = 'daum.net:clip'
|
IE_NAME = 'daum.net:clip'
|
||||||
|
_URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||||
@@ -87,11 +129,19 @@ class DaumClipIE(InfoExtractor):
|
|||||||
'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||||
'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||||
'upload_date': '20130831',
|
'upload_date': '20130831',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||||
'duration': 3868,
|
'duration': 3868,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super(DaumClipIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
clip_info = self._download_json(
|
clip_info = self._download_json(
|
||||||
@@ -102,7 +152,7 @@ class DaumClipIE(InfoExtractor):
|
|||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
|
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
|
||||||
'title': clip_info['title'],
|
'title': unescapeHTML(clip_info['title']),
|
||||||
'thumbnail': clip_info.get('thumb_url'),
|
'thumbnail': clip_info.get('thumb_url'),
|
||||||
'description': clip_info.get('contents'),
|
'description': clip_info.get('contents'),
|
||||||
'duration': int_or_none(clip_info.get('duration')),
|
'duration': int_or_none(clip_info.get('duration')),
|
||||||
@@ -110,3 +160,139 @@ class DaumClipIE(InfoExtractor):
|
|||||||
'view_count': int_or_none(clip_info.get('play_count')),
|
'view_count': int_or_none(clip_info.get('play_count')),
|
||||||
'ie_key': 'Daum',
|
'ie_key': 'Daum',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DaumListIE(InfoExtractor):
|
||||||
|
def _get_entries(self, list_id, list_id_type):
|
||||||
|
name = None
|
||||||
|
entries = []
|
||||||
|
for pagenum in itertools.count(1):
|
||||||
|
list_info = self._download_json(
|
||||||
|
'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % (
|
||||||
|
pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum)
|
||||||
|
|
||||||
|
entries.extend([
|
||||||
|
self.url_result(
|
||||||
|
'http://tvpot.daum.net/v/%s' % clip['vid'])
|
||||||
|
for clip in list_info['clip_list']
|
||||||
|
])
|
||||||
|
|
||||||
|
if not name:
|
||||||
|
name = list_info.get('playlist_bean', {}).get('name') or \
|
||||||
|
list_info.get('potInfo', {}).get('name')
|
||||||
|
|
||||||
|
if not list_info.get('has_more'):
|
||||||
|
break
|
||||||
|
|
||||||
|
return name, entries
|
||||||
|
|
||||||
|
def _check_clip(self, url, list_id):
|
||||||
|
query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
|
if 'clipid' in query_dict:
|
||||||
|
clip_id = query_dict['clipid'][0]
|
||||||
|
if self._downloader.params.get('noplaylist'):
|
||||||
|
self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
|
||||||
|
return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
|
||||||
|
else:
|
||||||
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
|
||||||
|
|
||||||
|
|
||||||
|
class DaumPlaylistIE(DaumListIE):
|
||||||
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P<id>[0-9]+)'
|
||||||
|
IE_NAME = 'daum.net:playlist'
|
||||||
|
_URL_TEMPLATE = 'http://tvpot.daum.net/mypot/View.do?playlistid=%s'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Playlist url with clipid',
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6213966',
|
||||||
|
'title': 'Woorissica Official',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 181
|
||||||
|
}, {
|
||||||
|
'note': 'Playlist url with clipid - noplaylist',
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '73806844',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '151017 Airport',
|
||||||
|
'upload_date': '20160117',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if DaumUserIE.suitable(url) else super(DaumPlaylistIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = self._match_id(url)
|
||||||
|
|
||||||
|
clip_result = self._check_clip(url, list_id)
|
||||||
|
if clip_result:
|
||||||
|
return clip_result
|
||||||
|
|
||||||
|
name, entries = self._get_entries(list_id, 'playlistid')
|
||||||
|
|
||||||
|
return self.playlist_result(entries, list_id, name)
|
||||||
|
|
||||||
|
|
||||||
|
class DaumUserIE(DaumListIE):
|
||||||
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.(?:do|tv)\?.*?ownerid=(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
IE_NAME = 'daum.net:user'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'o2scDLIVbHc0',
|
||||||
|
'title': '마이 리틀 텔레비전',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 213
|
||||||
|
}, {
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '73801156',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116',
|
||||||
|
'upload_date': '20160117',
|
||||||
|
'description': 'md5:5e91d2d6747f53575badd24bd62b9f36'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence',
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6196631',
|
||||||
|
'title': '마이 리틀 텔레비전 - 20160109',
|
||||||
|
},
|
||||||
|
'playlist_count': 11
|
||||||
|
}, {
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.tvpot.daum.net/mypot/Top.tv?ownerid=45x1okb1If50&playlistid=3569733',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = self._match_id(url)
|
||||||
|
|
||||||
|
clip_result = self._check_clip(url, list_id)
|
||||||
|
if clip_result:
|
||||||
|
return clip_result
|
||||||
|
|
||||||
|
query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
|
if 'playlistid' in query_dict:
|
||||||
|
playlist_id = query_dict['playlistid'][0]
|
||||||
|
return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist')
|
||||||
|
|
||||||
|
name, entries = self._get_entries(list_id, 'ownerid')
|
||||||
|
|
||||||
|
return self.playlist_result(entries, list_id, name)
|
||||||
|
@@ -53,8 +53,8 @@ class ESPNIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'class="video-play-button"[^>]+data-id="(\d+)',
|
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
|
||||||
webpage, 'video id')
|
webpage, 'video id', group='id')
|
||||||
|
|
||||||
cms = 'espn'
|
cms = 'espn'
|
||||||
if 'data-source="intl"' in webpage:
|
if 'data-source="intl"' in webpage:
|
||||||
|
@@ -6,9 +6,11 @@ import socket
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_etree_fromstring,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urllib_parse_unquote_plus,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
@@ -44,6 +46,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||||
_NETRC_MACHINE = 'facebook'
|
_NETRC_MACHINE = 'facebook'
|
||||||
IE_NAME = 'facebook'
|
IE_NAME = 'facebook'
|
||||||
|
|
||||||
|
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||||
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
||||||
@@ -65,6 +70,16 @@ class FacebookIE(InfoExtractor):
|
|||||||
'expected_warnings': [
|
'expected_warnings': [
|
||||||
'title'
|
'title'
|
||||||
]
|
]
|
||||||
|
}, {
|
||||||
|
'note': 'Video with DASH manifest',
|
||||||
|
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||||
|
'md5': '54706e4db4f5ad58fbad82dde1f1213f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '957955867617029',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
|
||||||
|
'uploader': 'Demy de Zeeuw',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -147,13 +162,36 @@ class FacebookIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
|
||||||
webpage = self._download_webpage(url, video_id)
|
req.add_header('User-Agent', self._CHROME_USER_AGENT)
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
video_data = None
|
||||||
|
|
||||||
BEFORE = '{swf.addParam(param[0], param[1]);});\n'
|
BEFORE = '{swf.addParam(param[0], param[1]);});\n'
|
||||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||||
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
||||||
if not m:
|
if m:
|
||||||
|
data = dict(json.loads(m.group(1)))
|
||||||
|
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||||
|
video_data = json.loads(params_raw)['video_data']
|
||||||
|
|
||||||
|
def video_data_list2dict(video_data):
|
||||||
|
ret = {}
|
||||||
|
for item in video_data:
|
||||||
|
format_id = item['stream_type']
|
||||||
|
ret.setdefault(format_id, []).append(item)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
|
server_js_data = self._parse_json(self._search_regex(
|
||||||
|
r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
|
||||||
|
for item in server_js_data['instances']:
|
||||||
|
if item[1][0] == 'VideoConfig':
|
||||||
|
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||||
|
break
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||||
if m_msg is not None:
|
if m_msg is not None:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@@ -161,12 +199,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
expected=True)
|
expected=True)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Cannot parse data')
|
raise ExtractorError('Cannot parse data')
|
||||||
data = dict(json.loads(m.group(1)))
|
|
||||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
|
||||||
params = json.loads(params_raw)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, f in params['video_data'].items():
|
for format_id, f in video_data.items():
|
||||||
if not f or not isinstance(f, list):
|
if not f or not isinstance(f, list):
|
||||||
continue
|
continue
|
||||||
for quality in ('sd', 'hd'):
|
for quality in ('sd', 'hd'):
|
||||||
@@ -178,9 +213,15 @@ class FacebookIE(InfoExtractor):
|
|||||||
'url': src,
|
'url': src,
|
||||||
'preference': -10 if format_id == 'progressive' else 0,
|
'preference': -10 if format_id == 'progressive' else 0,
|
||||||
})
|
})
|
||||||
|
dash_manifest = f[0].get('dash_manifest')
|
||||||
|
if dash_manifest:
|
||||||
|
formats.extend(self._parse_mpd_formats(
|
||||||
|
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('Cannot find video formats')
|
raise ExtractorError('Cannot find video formats')
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
|
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
|
||||||
default=None)
|
default=None)
|
||||||
|
@@ -9,6 +9,7 @@ class FOXIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
||||||
|
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '255180355939',
|
'id': '255180355939',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -17,10 +18,6 @@ class FOXIE(InfoExtractor):
|
|||||||
'duration': 129,
|
'duration': 129,
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -29,7 +26,7 @@ class FOXIE(InfoExtractor):
|
|||||||
|
|
||||||
release_url = self._parse_json(self._search_regex(
|
release_url = self._parse_json(self._search_regex(
|
||||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||||
video_id)['release_url'] + '&manifest=m3u'
|
video_id)['release_url'] + '&switch=http'
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
|
@@ -6,24 +6,29 @@ from ..utils import (
|
|||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
class GamekingsIE(InfoExtractor):
|
class GamekingsIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)'
|
_VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
# YouTube embed video
|
||||||
# MD5 is flaky, seems to change regularly
|
'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
'md5': '5208d3a17adeaef829a7861887cb9029',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
|
'id': 'HkSQKetlGOU',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
'title': 'Phoenix Wright: Ace Attorney - Dual Destinies Review',
|
||||||
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
'description': 'md5:db88c0e7f47e9ea50df3271b9dc72e1d',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader_id': 'UCJugRGo4STYMeFr5RoOShtQ',
|
||||||
|
'uploader': 'Gamekings Vault',
|
||||||
|
'upload_date': '20151123',
|
||||||
},
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
}, {
|
}, {
|
||||||
# vimeo video
|
# vimeo video
|
||||||
'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
|
'url': 'http://www.gamekings.nl/videos/the-legend-of-zelda-majoras-mask/',
|
||||||
'md5': '12bf04dfd238e70058046937657ea68d',
|
'md5': '12bf04dfd238e70058046937657ea68d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'the-legend-of-zelda-majoras-mask',
|
'id': 'the-legend-of-zelda-majoras-mask',
|
||||||
@@ -33,7 +38,7 @@ class GamekingsIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
'url': 'http://www.gamekings.nl/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@@ -43,7 +48,11 @@ class GamekingsIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
playlist_id = self._search_regex(
|
playlist_id = self._search_regex(
|
||||||
r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
|
r'gogoVideo\([^,]+,\s*"([^"]+)', webpage, 'playlist id')
|
||||||
|
|
||||||
|
# Check if a YouTube embed is used
|
||||||
|
if YoutubeIE.suitable(playlist_id):
|
||||||
|
return self.url_result(playlist_id, ie='Youtube')
|
||||||
|
|
||||||
playlist = self._download_xml(
|
playlist = self._download_xml(
|
||||||
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
||||||
|
@@ -224,6 +224,20 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# MPD from http://dash-mse-test.appspot.com/media.html
|
||||||
|
{
|
||||||
|
'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
|
||||||
|
'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'car-20120827-manifest',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'car-20120827-manifest',
|
||||||
|
'formats': 'mincount:9',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
},
|
||||||
# google redirect
|
# google redirect
|
||||||
{
|
{
|
||||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||||
@@ -1229,19 +1243,24 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
content_type = head_response.headers.get('Content-Type', '')
|
content_type = head_response.headers.get('Content-Type', '')
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
|
||||||
if m:
|
if m:
|
||||||
upload_date = unified_strdate(
|
upload_date = unified_strdate(
|
||||||
head_response.headers.get('Last-Modified'))
|
head_response.headers.get('Last-Modified'))
|
||||||
|
formats = []
|
||||||
|
if m.group('format_id').endswith('mpegurl'):
|
||||||
|
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||||
|
else:
|
||||||
|
formats = [{
|
||||||
|
'format_id': m.group('format_id'),
|
||||||
|
'url': url,
|
||||||
|
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||||
|
}]
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||||
'direct': True,
|
'direct': True,
|
||||||
'formats': [{
|
'formats': formats,
|
||||||
'format_id': m.group('format_id'),
|
|
||||||
'url': url,
|
|
||||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
|
||||||
}],
|
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1284,7 +1303,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
|
||||||
try:
|
try:
|
||||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||||
if doc.tag == 'rss':
|
if doc.tag == 'rss':
|
||||||
@@ -1293,6 +1312,13 @@ class GenericIE(InfoExtractor):
|
|||||||
return self._parse_smil(doc, url, video_id)
|
return self._parse_smil(doc, url, video_id)
|
||||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||||
|
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||||
|
'formats': self._parse_mpd_formats(
|
||||||
|
doc, video_id, mpd_base_url=url.rpartition('/')[0]),
|
||||||
|
}
|
||||||
except compat_xml_parse_error:
|
except compat_xml_parse_error:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -1402,7 +1428,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for embedded Dailymotion player
|
# Look for embedded Dailymotion player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(
|
return _playlist_from_matches(
|
||||||
matches, lambda m: unescapeHTML(m[1]))
|
matches, lambda m: unescapeHTML(m[1]))
|
||||||
@@ -1819,6 +1845,17 @@ class GenericIE(InfoExtractor):
|
|||||||
if digiteka_url:
|
if digiteka_url:
|
||||||
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
||||||
|
|
||||||
|
# Look for Limelight embeds
|
||||||
|
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||||
|
if mobj:
|
||||||
|
lm = {
|
||||||
|
'Media': 'media',
|
||||||
|
'Channel': 'channel',
|
||||||
|
'ChannelList': 'channel_list',
|
||||||
|
}
|
||||||
|
return self.url_result('limelight:%s:%s' % (
|
||||||
|
lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||||
|
|
||||||
# Look for AdobeTVVideo embeds
|
# Look for AdobeTVVideo embeds
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||||
@@ -1935,6 +1972,8 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||||
|
elif ext == 'mpd':
|
||||||
|
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
|
||||||
else:
|
else:
|
||||||
entry_info_dict['url'] = video_url
|
entry_info_dict['url'] = video_url
|
||||||
|
|
||||||
|
@@ -10,8 +10,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class HotStarIE(InfoExtractor):
|
class HotStarIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})'
|
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
|
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1000076273',
|
'id': '1000076273',
|
||||||
@@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.hotstar.com/1000000515',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
||||||
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
||||||
|
@@ -2,46 +2,30 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from random import random
|
import time
|
||||||
from math import floor
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
remove_end,
|
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class IPrimaIE(InfoExtractor):
|
class IPrimaIE(InfoExtractor):
|
||||||
_WORKING = False
|
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
||||||
_VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p136534',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Gondíci s. r. o. (34)',
|
||||||
|
'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # m3u8 download
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||||
'info_dict': {
|
|
||||||
'id': '39152',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Partička (92)',
|
|
||||||
'description': 'md5:74e9617e51bca67c3ecfb2c6f9766f45',
|
|
||||||
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # requires rtmpdump
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '9718337',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Tchibo Partička - Jarní móda',
|
|
||||||
'thumbnail': 're:^http:.*\.jpg$',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # requires rtmpdump
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@@ -51,62 +35,24 @@ class IPrimaIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
|
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
|
||||||
raise ExtractorError(
|
|
||||||
'%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
|
|
||||||
|
|
||||||
player_url = (
|
req = sanitized_Request(
|
||||||
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
|
'http://play.iprima.cz/prehravac/init?_infuse=1'
|
||||||
(floor(random() * 1073741824), floor(random() * 1073741824))
|
'&_ts=%s&productId=%s' % (round(time.time()), video_id))
|
||||||
)
|
|
||||||
|
|
||||||
req = sanitized_Request(player_url)
|
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
playerpage = self._download_webpage(req, video_id)
|
playerpage = self._download_webpage(req, video_id, note='Downloading player')
|
||||||
|
|
||||||
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url')
|
||||||
|
|
||||||
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||||
if zoneGEO != '0':
|
|
||||||
base_url = base_url.replace('token', 'token_' + zoneGEO)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for format_id in ['lq', 'hq', 'hd']:
|
|
||||||
filename = self._html_search_regex(
|
|
||||||
r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
|
||||||
|
|
||||||
if filename == 'null':
|
|
||||||
continue
|
|
||||||
|
|
||||||
real_id = self._search_regex(
|
|
||||||
r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]',
|
|
||||||
filename, 'real video id')
|
|
||||||
|
|
||||||
if format_id == 'lq':
|
|
||||||
quality = 0
|
|
||||||
elif format_id == 'hq':
|
|
||||||
quality = 1
|
|
||||||
elif format_id == 'hd':
|
|
||||||
quality = 2
|
|
||||||
filename = 'hq/' + filename
|
|
||||||
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': base_url,
|
|
||||||
'quality': quality,
|
|
||||||
'play_path': 'mp4:' + filename.replace('"', '')[:-4],
|
|
||||||
'rtmp_live': True,
|
|
||||||
'ext': 'flv',
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': real_id,
|
'id': video_id,
|
||||||
'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'),
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': self._search_regex(
|
'description': self._og_search_description(webpage),
|
||||||
r'<p[^>]+itemprop="description"[^>]*>([^<]+)',
|
|
||||||
webpage, 'description', default=None),
|
|
||||||
}
|
}
|
||||||
|
@@ -2,12 +2,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
class KickStarterIE(InfoExtractor):
|
class KickStarterIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
|
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
|
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description',
|
||||||
'md5': 'c81addca81327ffa66c642b5d8b08cab',
|
'md5': 'c81addca81327ffa66c642b5d8b08cab',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1404461844',
|
'id': '1404461844',
|
||||||
@@ -27,7 +28,8 @@ class KickStarterIE(InfoExtractor):
|
|||||||
'uploader_id': 'pebble',
|
'uploader_id': 'pebble',
|
||||||
'uploader': 'Pebble Technology',
|
'uploader': 'Pebble Technology',
|
||||||
'title': 'Pebble iOS Notifications',
|
'title': 'Pebble iOS Notifications',
|
||||||
}
|
},
|
||||||
|
'add_ie': ['Vimeo'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
|
'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -43,7 +45,7 @@ class KickStarterIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>\s*(.*?)(?:\s*— Kickstarter)?\s*</title>',
|
r'<title>\s*(.*?)(?:\s*—\s*Kickstarter)?\s*</title>',
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r'data-video-url="(.*?)"',
|
r'data-video-url="(.*?)"',
|
||||||
@@ -52,7 +54,7 @@ class KickStarterIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'Generic',
|
'ie_key': 'Generic',
|
||||||
'url': url,
|
'url': smuggle_url(url, {'to_generic': True}),
|
||||||
'title': title,
|
'title': title,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
107
youtube_dl/extractor/konserthusetplay.py
Normal file
107
youtube_dl/extractor/konserthusetplay.py
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KonserthusetPlayIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'CKDDnlCY-dhWAAqiMERd-A',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Orkesterns instrument: Valthornen',
|
||||||
|
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
|
||||||
|
'thumbnail': 're:^https?://.*$',
|
||||||
|
'duration': 398.8,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
e = self._search_regex(
|
||||||
|
r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e')
|
||||||
|
|
||||||
|
rest = self._download_json(
|
||||||
|
'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e,
|
||||||
|
video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
|
||||||
|
|
||||||
|
media = rest['media']
|
||||||
|
player_config = media['playerconfig']
|
||||||
|
playlist = player_config['playlist']
|
||||||
|
|
||||||
|
source = next(f for f in playlist if f.get('bitrates'))
|
||||||
|
|
||||||
|
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
fallback_url = source.get('fallbackUrl')
|
||||||
|
fallback_format_id = None
|
||||||
|
if fallback_url:
|
||||||
|
fallback_format_id = self._search_regex(
|
||||||
|
FORMAT_ID_REGEX, fallback_url, 'format id', default=None)
|
||||||
|
|
||||||
|
connection_url = (player_config.get('rtmp', {}).get(
|
||||||
|
'netConnectionUrl') or player_config.get(
|
||||||
|
'plugins', {}).get('bwcheck', {}).get('netConnectionUrl'))
|
||||||
|
if connection_url:
|
||||||
|
for f in source['bitrates']:
|
||||||
|
video_url = f.get('url')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
format_id = self._search_regex(
|
||||||
|
FORMAT_ID_REGEX, video_url, 'format id', default=None)
|
||||||
|
f_common = {
|
||||||
|
'vbr': int_or_none(f.get('bitrate')),
|
||||||
|
'width': int_or_none(f.get('width')),
|
||||||
|
'height': int_or_none(f.get('height')),
|
||||||
|
}
|
||||||
|
f = f_common.copy()
|
||||||
|
f.update({
|
||||||
|
'url': connection_url,
|
||||||
|
'play_path': video_url,
|
||||||
|
'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp',
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
if format_id and format_id == fallback_format_id:
|
||||||
|
f = f_common.copy()
|
||||||
|
f.update({
|
||||||
|
'url': fallback_url,
|
||||||
|
'format_id': 'http-%s' % format_id if format_id else 'http',
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
|
||||||
|
if not formats and fallback_url:
|
||||||
|
formats.append({
|
||||||
|
'url': fallback_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = player_config.get('title') or media['title']
|
||||||
|
description = player_config.get('mediaInfo', {}).get('description')
|
||||||
|
thumbnail = media.get('image')
|
||||||
|
duration = float_or_none(media.get('duration'), 1000)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -31,6 +31,10 @@ class KuwoBaseIE(InfoExtractor):
|
|||||||
(file_format['ext'], file_format.get('br', ''), song_id),
|
(file_format['ext'], file_format.get('br', ''), song_id),
|
||||||
song_id, note='Download %s url info' % file_format['format'],
|
song_id, note='Download %s url info' % file_format['format'],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if song_url == 'IPDeny':
|
||||||
|
raise ExtractorError('This song is blocked in this region', expected=True)
|
||||||
|
|
||||||
if song_url.startswith('http://') or song_url.startswith('https://'):
|
if song_url.startswith('http://') or song_url.startswith('https://'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': song_url,
|
'url': song_url,
|
||||||
|
@@ -5,11 +5,13 @@ import datetime
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import base64
|
import base64
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_ord,
|
compat_ord,
|
||||||
|
compat_str,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@@ -258,6 +260,7 @@ class LetvCloudIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
|
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
|
||||||
|
'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p7jnfw5hw9_ec93197892',
|
'id': 'p7jnfw5hw9_ec93197892',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -265,6 +268,7 @@ class LetvCloudIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
|
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
|
||||||
|
'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p7jnfw5hw9_187060b6fd',
|
'id': 'p7jnfw5hw9_187060b6fd',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -272,21 +276,37 @@ class LetvCloudIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
@staticmethod
|
||||||
uu_mobj = re.search('uu=([\w]+)', url)
|
def sign_data(obj):
|
||||||
vu_mobj = re.search('vu=([\w]+)', url)
|
if obj['cf'] == 'flash':
|
||||||
|
salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
|
||||||
|
items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
|
||||||
|
elif obj['cf'] == 'html5':
|
||||||
|
salt = 'fbeh5player12c43eccf2bec3300344'
|
||||||
|
items = ['cf', 'ran', 'uu', 'bver', 'vu']
|
||||||
|
input_data = ''.join([item + obj[item] for item in items]) + salt
|
||||||
|
obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
if not uu_mobj or not vu_mobj:
|
def _get_formats(self, cf, uu, vu, media_id):
|
||||||
raise ExtractorError('Invalid URL: %s' % url, expected=True)
|
def get_play_json(cf, timestamp):
|
||||||
|
data = {
|
||||||
|
'cf': cf,
|
||||||
|
'ver': '2.2',
|
||||||
|
'bver': 'firefox44.0',
|
||||||
|
'format': 'json',
|
||||||
|
'uu': uu,
|
||||||
|
'vu': vu,
|
||||||
|
'ran': compat_str(timestamp),
|
||||||
|
}
|
||||||
|
self.sign_data(data)
|
||||||
|
return self._download_json(
|
||||||
|
'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data),
|
||||||
|
media_id, 'Downloading playJson data for type %s' % cf)
|
||||||
|
|
||||||
uu = uu_mobj.group(1)
|
play_json = get_play_json(cf, time.time())
|
||||||
vu = vu_mobj.group(1)
|
# The server time may be different from local time
|
||||||
media_id = uu + '_' + vu
|
if play_json.get('code') == 10071:
|
||||||
|
play_json = get_play_json(cf, play_json['timestamp'])
|
||||||
play_json_req = sanitized_Request(
|
|
||||||
'http://api.letvcloud.com/gpc.php?cf=html5&sign=signxxxxx&ver=2.2&format=json&' +
|
|
||||||
'uu=' + uu + '&vu=' + vu)
|
|
||||||
play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data')
|
|
||||||
|
|
||||||
if not play_json.get('data'):
|
if not play_json.get('data'):
|
||||||
if play_json.get('message'):
|
if play_json.get('message'):
|
||||||
@@ -312,6 +332,21 @@ class LetvCloudIE(InfoExtractor):
|
|||||||
'width': int_or_none(play_url.get('vwidth')),
|
'width': int_or_none(play_url.get('vwidth')),
|
||||||
'height': int_or_none(play_url.get('vheight')),
|
'height': int_or_none(play_url.get('vheight')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
uu_mobj = re.search('uu=([\w]+)', url)
|
||||||
|
vu_mobj = re.search('vu=([\w]+)', url)
|
||||||
|
|
||||||
|
if not uu_mobj or not vu_mobj:
|
||||||
|
raise ExtractorError('Invalid URL: %s' % url, expected=True)
|
||||||
|
|
||||||
|
uu = uu_mobj.group(1)
|
||||||
|
vu = vu_mobj.group(1)
|
||||||
|
media_id = uu + '_' + vu
|
||||||
|
|
||||||
|
formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -40,7 +40,8 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
if not stream_url:
|
if not stream_url:
|
||||||
continue
|
continue
|
||||||
if '.f4m' in stream_url:
|
if '.f4m' in stream_url:
|
||||||
formats.extend(self._extract_f4m_formats(stream_url, video_id))
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
stream_url, video_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
fmt = {
|
fmt = {
|
||||||
'url': stream_url,
|
'url': stream_url,
|
||||||
@@ -72,8 +73,8 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
format_id = mobile_url.get('targetMediaPlatform')
|
format_id = mobile_url.get('targetMediaPlatform')
|
||||||
if determine_ext(media_url) == 'm3u8':
|
if determine_ext(media_url) == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
media_url, video_id, 'mp4', 'm3u8_native',
|
||||||
preference=-1, m3u8_id=format_id))
|
m3u8_id=format_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': media_url,
|
'url': media_url,
|
||||||
|
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MailRuIE(InfoExtractor):
|
class MailRuIE(InfoExtractor):
|
||||||
@@ -34,14 +38,30 @@ class MailRuIE(InfoExtractor):
|
|||||||
'id': '46843144_1263',
|
'id': '46843144_1263',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
||||||
'timestamp': 1397217632,
|
'timestamp': 1397039888,
|
||||||
'upload_date': '20140411',
|
'upload_date': '20140409',
|
||||||
'uploader': 'hitech',
|
'uploader': 'hitech@corp.mail.ru',
|
||||||
'uploader_id': 'hitech@corp.mail.ru',
|
'uploader_id': 'hitech@corp.mail.ru',
|
||||||
'duration': 245,
|
'duration': 245,
|
||||||
},
|
},
|
||||||
'skip': 'Not accessible from Travis CI server',
|
'skip': 'Not accessible from Travis CI server',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# only available via metaUrl API
|
||||||
|
'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
|
||||||
|
'md5': '3b26d2491c6949d031a32b96bd97c096',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '56664382_502',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': ':8336',
|
||||||
|
'timestamp': 1449094163,
|
||||||
|
'upload_date': '20151202',
|
||||||
|
'uploader': '720pizle@mail.ru',
|
||||||
|
'uploader_id': '720pizle@mail.ru',
|
||||||
|
'duration': 6001,
|
||||||
|
},
|
||||||
|
'skip': 'Not accessible from Travis CI server',
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -51,32 +71,55 @@ class MailRuIE(InfoExtractor):
|
|||||||
if not video_id:
|
if not video_id:
|
||||||
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
||||||
|
|
||||||
video_data = self._download_json(
|
webpage = self._download_webpage(url, video_id)
|
||||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
|
||||||
|
|
||||||
author = video_data['author']
|
video_data = None
|
||||||
uploader = author['name']
|
|
||||||
uploader_id = author.get('id') or author.get('email')
|
page_config = self._parse_json(self._search_regex(
|
||||||
view_count = video_data.get('views_count')
|
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
||||||
|
webpage, 'page config', default='{}'), video_id, fatal=False)
|
||||||
|
if page_config:
|
||||||
|
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
|
||||||
|
if meta_url:
|
||||||
|
video_data = self._download_json(
|
||||||
|
meta_url, video_id, 'Downloading video meta JSON', fatal=False)
|
||||||
|
|
||||||
|
# Fallback old approach
|
||||||
|
if not video_data:
|
||||||
|
video_data = self._download_json(
|
||||||
|
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for f in video_data['videos']:
|
||||||
|
video_url = f.get('url')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
format_id = f.get('key')
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
meta_data = video_data['meta']
|
meta_data = video_data['meta']
|
||||||
content_id = '%s_%s' % (
|
title = remove_end(meta_data['title'], '.mp4')
|
||||||
meta_data.get('accId', ''), meta_data['itemId'])
|
|
||||||
title = meta_data['title']
|
|
||||||
if title.endswith('.mp4'):
|
|
||||||
title = title[:-4]
|
|
||||||
thumbnail = meta_data['poster']
|
|
||||||
duration = meta_data['duration']
|
|
||||||
timestamp = meta_data['timestamp']
|
|
||||||
|
|
||||||
formats = [
|
author = video_data.get('author')
|
||||||
{
|
uploader = author.get('name')
|
||||||
'url': video['url'],
|
uploader_id = author.get('id') or author.get('email')
|
||||||
'format_id': video['key'],
|
view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
|
||||||
'height': int(video['key'].rstrip('p'))
|
|
||||||
} for video in video_data['videos']
|
acc_id = meta_data.get('accId')
|
||||||
]
|
item_id = meta_data.get('itemId')
|
||||||
self._sort_formats(formats)
|
content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
|
||||||
|
|
||||||
|
thumbnail = meta_data.get('poster')
|
||||||
|
duration = int_or_none(meta_data.get('duration'))
|
||||||
|
timestamp = int_or_none(meta_data.get('timestamp'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': content_id,
|
'id': content_id,
|
||||||
|
55
youtube_dl/extractor/matchtv.py
Normal file
55
youtube_dl/extractor/matchtv.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse
|
||||||
|
from ..utils import (
|
||||||
|
sanitized_Request,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MatchTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://matchtv\.ru/?#live-player'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://matchtv.ru/#live-player',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'matchtv-live',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = 'matchtv-live'
|
||||||
|
request = sanitized_Request(
|
||||||
|
'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse.urlencode({
|
||||||
|
'ts': '',
|
||||||
|
'quality': 'SD',
|
||||||
|
'contentId': '561d2c0df7159b37178b4567',
|
||||||
|
'sign': '',
|
||||||
|
'includeHighlights': '0',
|
||||||
|
'userId': '',
|
||||||
|
'sessionId': random.randint(1, 1000000000),
|
||||||
|
'contentType': 'channel',
|
||||||
|
'timeShift': '0',
|
||||||
|
'platform': 'portal',
|
||||||
|
}),
|
||||||
|
headers={
|
||||||
|
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
|
||||||
|
})
|
||||||
|
video_url = self._download_json(request, video_id)['data']['videoUrl']
|
||||||
|
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
|
||||||
|
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._live_title('Матч ТВ - Прямой эфир'),
|
||||||
|
'is_live': True,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -18,13 +18,17 @@ class NBAIE(InfoExtractor):
|
|||||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0021200253-okc-bkn-recap',
|
'id': '0021200253-okc-bkn-recap',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Thunder vs. Nets',
|
'title': 'Thunder vs. Nets',
|
||||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||||
'duration': 181,
|
'duration': 181,
|
||||||
'timestamp': 1354638466,
|
'timestamp': 1354638466,
|
||||||
'upload_date': '20121204',
|
'upload_date': '20121204',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -68,7 +72,7 @@ class NBAIE(InfoExtractor):
|
|||||||
if video_url.startswith('/'):
|
if video_url.startswith('/'):
|
||||||
continue
|
continue
|
||||||
if video_url.endswith('.m3u8'):
|
if video_url.endswith('.m3u8'):
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False))
|
formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||||
elif video_url.endswith('.f4m'):
|
elif video_url.endswith('.f4m'):
|
||||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
|
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
|
||||||
else:
|
else:
|
||||||
|
@@ -19,38 +19,45 @@ class NBCIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||||
# md5 checksum is not stable
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'c9xnCo0YPOPH',
|
'id': '112966',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||||
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/the-tonight-show/episodes/176',
|
'url': 'http://www.nbc.com/the-tonight-show/episodes/176',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XwU9KZkp98TH',
|
'id': '176',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen',
|
'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen',
|
||||||
'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.',
|
'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.',
|
||||||
},
|
},
|
||||||
'skip': 'Only works from US',
|
'skip': '404 Not Found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8iUuyzWDdYUZ',
|
'id': '2832821',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Star Wars Teaser',
|
'title': 'Star Wars Teaser',
|
||||||
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
'skip': 'Only works from US',
|
'skip': 'Only works from US',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# This video has expired but with an escaped embedURL
|
# This video has expired but with an escaped embedURL
|
||||||
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
||||||
'skip': 'Expired'
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -66,7 +73,11 @@ class NBCIE(InfoExtractor):
|
|||||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||||
if theplatform_url.startswith('//'):
|
if theplatform_url.startswith('//'):
|
||||||
theplatform_url = 'http:' + theplatform_url
|
theplatform_url = 'http:' + theplatform_url
|
||||||
return self.url_result(smuggle_url(theplatform_url, {'source_url': url}))
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': smuggle_url(theplatform_url, {'source_url': url}),
|
||||||
|
'id': video_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class NBCSportsVPlayerIE(InfoExtractor):
|
class NBCSportsVPlayerIE(InfoExtractor):
|
||||||
|
@@ -189,7 +189,7 @@ class NPOIE(NPOBaseIE):
|
|||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
if format_id == 'adaptive':
|
if format_id == 'adaptive':
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
@@ -406,6 +406,38 @@ class NPORadioFragmentIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SchoolTVIE(InfoExtractor):
|
||||||
|
IE_NAME = 'schooltv'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'WO_NTR_429477',
|
||||||
|
'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
|
||||||
|
'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:abfa0ff690adb73fd0297fd033aaa631'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Skip because of m3u8 download
|
||||||
|
'skip_download': True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id')
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': 'NPO',
|
||||||
|
'url': 'npo:%s' % video_id,
|
||||||
|
'display_id': display_id
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class VPROIE(NPOIE):
|
class VPROIE(NPOIE):
|
||||||
IE_NAME = 'vpro'
|
IE_NAME = 'vpro'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||||
|
@@ -133,26 +133,32 @@ class NRKTVIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MUHH48000314',
|
'id': 'MUHH48000314',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '20 spørsmål',
|
'title': '20 spørsmål',
|
||||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||||
'upload_date': '20140523',
|
'upload_date': '20140523',
|
||||||
'duration': 1741.52,
|
'duration': 1741.52,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||||
'md5': '383650ece2b25ecec996ad7b5bb2a384',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdfp15000514',
|
'id': 'mdfp15000514',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
|
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||||
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
||||||
'upload_date': '20140524',
|
'upload_date': '20140524',
|
||||||
'duration': 4605.0,
|
'duration': 4605.08,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
US_RATINGS,
|
US_RATINGS,
|
||||||
@@ -199,7 +201,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'id': '2365006249',
|
'id': '2365006249',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
||||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
|
||||||
'duration': 3190,
|
'duration': 3190,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@@ -213,7 +215,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'id': '2365297690',
|
'id': '2365297690',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'FRONTLINE - Losing Iraq',
|
'title': 'FRONTLINE - Losing Iraq',
|
||||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
|
||||||
'duration': 5050,
|
'duration': 5050,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@@ -227,7 +229,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'id': '2201174722',
|
'id': '2201174722',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||||
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
|
'description': 'md5:95a19f568689d09a166dff9edada3301',
|
||||||
'duration': 801,
|
'duration': 801,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -237,8 +239,8 @@ class PBSIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2365297708',
|
'id': '2365297708',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
|
|
||||||
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||||
|
'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
|
||||||
'duration': 6559,
|
'duration': 6559,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
@@ -278,7 +280,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'display_id': 'player',
|
'display_id': 'player',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'American Experience - Death and the Civil War, Chapter 1',
|
'title': 'American Experience - Death and the Civil War, Chapter 1',
|
||||||
'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
|
'description': 'md5:1b80a74e0380ed2a4fb335026de1600d',
|
||||||
'duration': 682,
|
'duration': 682,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
@@ -287,20 +289,19 @@ class PBSIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://video.pbs.org/video/2365367186/',
|
'url': 'http://www.pbs.org/video/2365245528/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2365367186',
|
'id': '2365245528',
|
||||||
'display_id': '2365367186',
|
'display_id': '2365245528',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'To Catch A Comet - Full Episode',
|
'title': 'FRONTLINE - United States of Secrets (Part One)',
|
||||||
'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.',
|
'description': 'md5:55756bd5c551519cc4b7703e373e217e',
|
||||||
'duration': 3342,
|
'duration': 6851,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
},
|
},
|
||||||
'skip': 'Expired',
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
||||||
@@ -312,7 +313,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
|
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
|
||||||
'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
|
'description': 'md5:54033c6baa1f9623607c6e2ed245888b',
|
||||||
'duration': 1480,
|
'duration': 1480,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
@@ -328,7 +329,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'display_id': 'the-atomic-artists',
|
'display_id': 'the-atomic-artists',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'FRONTLINE - The Atomic Artists',
|
'title': 'FRONTLINE - The Atomic Artists',
|
||||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1',
|
||||||
'duration': 723,
|
'duration': 723,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
@@ -365,8 +366,12 @@ class PBSIE(InfoExtractor):
|
|||||||
webpage, 'upload date', default=None))
|
webpage, 'upload date', default=None))
|
||||||
|
|
||||||
# tabbed frontline videos
|
# tabbed frontline videos
|
||||||
tabbed_videos = re.findall(
|
MULTI_PART_REGEXES = (
|
||||||
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
|
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
|
||||||
|
r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
|
||||||
|
)
|
||||||
|
for p in MULTI_PART_REGEXES:
|
||||||
|
tabbed_videos = re.findall(p, webpage)
|
||||||
if tabbed_videos:
|
if tabbed_videos:
|
||||||
return tabbed_videos, presumptive_id, upload_date
|
return tabbed_videos, presumptive_id, upload_date
|
||||||
|
|
||||||
@@ -432,9 +437,21 @@ class PBSIE(InfoExtractor):
|
|||||||
for vid_id in video_id]
|
for vid_id in video_id]
|
||||||
return self.playlist_result(entries, display_id)
|
return self.playlist_result(entries, display_id)
|
||||||
|
|
||||||
|
try:
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||||
display_id)
|
display_id, 'Downloading video info JSON')
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404:
|
||||||
|
raise
|
||||||
|
# videoInfo API may not work for some videos, fallback to portalplayer API
|
||||||
|
player = self._download_webpage(
|
||||||
|
'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
|
||||||
|
info = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||||
|
player, 'video data', default='{}'),
|
||||||
|
display_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
||||||
@@ -493,7 +510,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'description': info['program'].get('description'),
|
'description': info.get('description') or info.get('program', {}).get('description'),
|
||||||
'thumbnail': info.get('image_url'),
|
'thumbnail': info.get('image_url'),
|
||||||
'duration': int_or_none(info.get('duration')),
|
'duration': int_or_none(info.get('duration')),
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
51
youtube_dl/extractor/plays.py
Normal file
51
youtube_dl/extractor/plays.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class PlaysTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
|
||||||
|
'md5': 'dfeac1198506652b5257a62762cec7bc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '56af17f56c95335490',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'When you outplay the Azir wall',
|
||||||
|
'description': 'Posted by Bjergsen',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
content = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'R\.bindContent\(({.+?})\);', webpage,
|
||||||
|
'content'), video_id)['content']
|
||||||
|
mpd_url, sources = re.search(
|
||||||
|
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
|
||||||
|
content).groups()
|
||||||
|
formats = self._extract_mpd_formats(
|
||||||
|
self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
|
||||||
|
for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
|
||||||
|
formats.append({
|
||||||
|
'url': self._proto_relative_url(format_url),
|
||||||
|
'format_id': 'http-' + format_id,
|
||||||
|
'height': int_or_none(height),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -1,6 +1,8 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
@@ -61,12 +63,15 @@ class RteIE(InfoExtractor):
|
|||||||
class RteRadioIE(InfoExtractor):
|
class RteRadioIE(InfoExtractor):
|
||||||
IE_NAME = 'rte:radio'
|
IE_NAME = 'rte:radio'
|
||||||
IE_DESC = 'Raidió Teilifís Éireann radio'
|
IE_DESC = 'Raidió Teilifís Éireann radio'
|
||||||
# Radioplayer URLs have the specifier #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
# Radioplayer URLs have two distinct specifier formats,
|
||||||
|
# the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
||||||
|
# the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
|
||||||
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
|
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
|
||||||
# An <id> uniquely defines an individual recording, and is the only part we require.
|
# An <id> uniquely defines an individual recording, and is the only part we require.
|
||||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:[0-9]*)(?:%3A|:)(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
# Old-style player URL; HLS and RTMPE formats
|
||||||
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
|
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10507902',
|
'id': '10507902',
|
||||||
@@ -81,7 +86,23 @@ class RteRadioIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'f4m fails with --test atm'
|
'skip_download': 'f4m fails with --test atm'
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# New-style player URL; RTMPE formats only
|
||||||
|
'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3250678',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Lyric Concert with Paul Herriott',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': '',
|
||||||
|
'timestamp': 1333742400,
|
||||||
|
'upload_date': '20120406',
|
||||||
|
'duration': 7199.016,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'f4m fails with --test atm'
|
||||||
}
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
item_id = self._match_id(url)
|
item_id = self._match_id(url)
|
||||||
@@ -102,8 +123,18 @@ class RteRadioIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
if mg.get('url') and not mg['url'].startswith('rtmpe:'):
|
if mg.get('url'):
|
||||||
formats.append({'url': mg['url']})
|
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
||||||
|
if m:
|
||||||
|
m = m.groupdict()
|
||||||
|
formats.append({
|
||||||
|
'url': m['url'] + '/' + m['app'],
|
||||||
|
'app': m['app'],
|
||||||
|
'play_path': m['playpath'],
|
||||||
|
'player_url': url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
})
|
||||||
|
|
||||||
if mg.get('hls_server') and mg.get('hls_url'):
|
if mg.get('hls_server') and mg.get('hls_url'):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
@@ -71,7 +71,7 @@ class ScreenwaveMediaIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for source in sources:
|
for source in sources:
|
||||||
if source['type'] == 'hls':
|
if source['type'] == 'hls':
|
||||||
formats.extend(self._extract_m3u8_formats(source['file'], video_id))
|
formats.extend(self._extract_m3u8_formats(source['file'], video_id, ext='mp4'))
|
||||||
else:
|
else:
|
||||||
file_ = source.get('file')
|
file_ = source.get('file')
|
||||||
if not file_:
|
if not file_:
|
||||||
@@ -107,7 +107,11 @@ class TeamFourIE(InfoExtractor):
|
|||||||
'upload_date': '20130401',
|
'upload_date': '20130401',
|
||||||
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
|
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
|
||||||
'title': 'A Moment With TFS Episode 4',
|
'title': 'A Moment With TFS Episode 4',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -53,17 +53,25 @@ class SenateISVPIE(InfoExtractor):
|
|||||||
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'judiciary031715',
|
'id': 'judiciary031715',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Integrated Senate Video Player',
|
'title': 'Integrated Senate Video Player',
|
||||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
|
'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'commerce011514',
|
'id': 'commerce011514',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Integrated Senate Video Player'
|
'title': 'Integrated Senate Video Player'
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
||||||
# checksum differs each time
|
# checksum differs each time
|
||||||
|
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
class SpankBangIE(InfoExtractor):
|
class SpankBangIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
|
_VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -19,7 +19,11 @@ class SpankBangIE(InfoExtractor):
|
|||||||
'uploader': 'silly2587',
|
'uploader': 'silly2587',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# 480p only
|
||||||
|
'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -34,7 +38,8 @@ class SpankBangIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'format_id': '%sp' % height,
|
'format_id': '%sp' % height,
|
||||||
'height': int(height),
|
'height': int(height),
|
||||||
} for height in re.findall(r'<(?:span|li)[^>]+q_(\d+)p', webpage)]
|
} for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)]
|
||||||
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
|
@@ -70,14 +70,11 @@ class SRGSSRIE(InfoExtractor):
|
|||||||
asset_url, media_id, 'mp4', 'm3u8_native',
|
asset_url, media_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id=format_id, fatal=False))
|
m3u8_id=format_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
ext = None
|
|
||||||
if protocol == 'RTMP':
|
|
||||||
ext = self._search_regex(r'([a-z0-9]+):[^/]+', asset_url, 'ext')
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': asset_url,
|
'url': asset_url,
|
||||||
'preference': preference(quality),
|
'preference': preference(quality),
|
||||||
'ext': ext,
|
'ext': 'flv' if protocol == 'RTMP' else None,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@@ -20,7 +20,6 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_basename,
|
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -283,8 +282,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
first_video_id = None
|
first_video_id = None
|
||||||
duration = None
|
duration = None
|
||||||
for item in entry['media$content']:
|
for item in entry['media$content']:
|
||||||
smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
|
smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
|
||||||
cur_video_id = url_basename(smil_url)
|
cur_video_id = ThePlatformIE._match_id(smil_url)
|
||||||
if first_video_id is None:
|
if first_video_id is None:
|
||||||
first_video_id = cur_video_id
|
first_video_id = cur_video_id
|
||||||
duration = float_or_none(item.get('plfile$duration'))
|
duration = float_or_none(item.get('plfile$duration'))
|
||||||
|
@@ -17,18 +17,21 @@ class TV2IE(InfoExtractor):
|
|||||||
_VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
|
_VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.tv2.no/v/916509/',
|
'url': 'http://www.tv2.no/v/916509/',
|
||||||
'md5': '9cb9e3410b18b515d71892f27856e9b1',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '916509',
|
'id': '916509',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Se Gryttens hyllest av Steven Gerrard',
|
'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
|
||||||
'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
|
'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
|
||||||
'timestamp': 1431715610,
|
'timestamp': 1431715610,
|
||||||
'upload_date': '20150515',
|
'upload_date': '20150515',
|
||||||
'duration': 156.967,
|
'duration': 156.967,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -197,8 +197,14 @@ class VevoIE(InfoExtractor):
|
|||||||
if not version_url:
|
if not version_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if '.mpd' in version_url or '.ism' in version_url:
|
if '.ism' in version_url:
|
||||||
continue
|
continue
|
||||||
|
elif '.mpd' in version_url:
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
version_url, video_id, mpd_id='dash-%s' % version,
|
||||||
|
note='Downloading %s MPD information' % version,
|
||||||
|
errnote='Failed to download %s MPD information' % version,
|
||||||
|
fatal=False))
|
||||||
elif '.m3u8' in version_url:
|
elif '.m3u8' in version_url:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
version_url, video_id, 'mp4', 'm3u8_native',
|
version_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
@@ -86,10 +86,9 @@ class VGTVIE(XstreamIE):
|
|||||||
{
|
{
|
||||||
# streamType: wasLive
|
# streamType: wasLive
|
||||||
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
|
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
|
||||||
'md5': '458f4841239dab414343b50e5af8869c',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '113063',
|
'id': '113063',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'V75 fra Solvalla 30.05.15',
|
'title': 'V75 fra Solvalla 30.05.15',
|
||||||
'description': 'md5:b3743425765355855f88e096acc93231',
|
'description': 'md5:b3743425765355855f88e096acc93231',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
@@ -98,6 +97,10 @@ class VGTVIE(XstreamIE):
|
|||||||
'upload_date': '20150530',
|
'upload_date': '20150530',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
||||||
|
@@ -1,6 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -12,10 +16,10 @@ class ViddlerIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.viddler.com/v/43903784',
|
'url': 'http://www.viddler.com/v/43903784',
|
||||||
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
|
'md5': '9eee21161d2c7f5b39690c3e325fab2f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '43903784',
|
'id': '43903784',
|
||||||
'ext': 'mp4',
|
'ext': 'mov',
|
||||||
'title': 'Video Made Easy',
|
'title': 'Video Made Easy',
|
||||||
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
||||||
'uploader': 'viddler',
|
'uploader': 'viddler',
|
||||||
@@ -29,10 +33,10 @@ class ViddlerIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.viddler.com/v/4d03aad9/',
|
'url': 'http://www.viddler.com/v/4d03aad9/',
|
||||||
'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
|
'md5': 'f12c5a7fa839c47a79363bfdf69404fb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4d03aad9',
|
'id': '4d03aad9',
|
||||||
'ext': 'mp4',
|
'ext': 'ts',
|
||||||
'title': 'WALL-TO-GORTAT',
|
'title': 'WALL-TO-GORTAT',
|
||||||
'upload_date': '20150126',
|
'upload_date': '20150126',
|
||||||
'uploader': 'deadspin',
|
'uploader': 'deadspin',
|
||||||
@@ -42,10 +46,10 @@ class ViddlerIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
||||||
'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
|
'md5': '740511f61d3d1bb71dc14a0fe01a1c10',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '221ebbbd',
|
'id': '221ebbbd',
|
||||||
'ext': 'mp4',
|
'ext': 'mov',
|
||||||
'title': 'LETeens-Grammar-snack-third-conditional',
|
'title': 'LETeens-Grammar-snack-third-conditional',
|
||||||
'description': ' ',
|
'description': ' ',
|
||||||
'upload_date': '20140929',
|
'upload_date': '20140929',
|
||||||
@@ -54,16 +58,42 @@ class ViddlerIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# secret protected
|
||||||
|
'url': 'http://www.viddler.com/v/890c0985?secret=34051570',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '890c0985',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Complete Property Training - Traineeships',
|
||||||
|
'description': ' ',
|
||||||
|
'upload_date': '20130606',
|
||||||
|
'uploader': 'TiffanyBowtell',
|
||||||
|
'timestamp': 1370496993,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
json_url = (
|
query = {
|
||||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
'video_id': video_id,
|
||||||
video_id)
|
'key': 'v0vhrt7bg2xq1vyxhkct',
|
||||||
|
}
|
||||||
|
|
||||||
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
|
secret = qs.get('secret', [None])[0]
|
||||||
|
if secret:
|
||||||
|
query['secret'] = secret
|
||||||
|
|
||||||
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
||||||
request = sanitized_Request(json_url, None, headers)
|
request = sanitized_Request(
|
||||||
|
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s'
|
||||||
|
% compat_urllib_parse.urlencode(query), None, headers)
|
||||||
data = self._download_json(request, video_id)['video']
|
data = self._download_json(request, video_id)['video']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@@ -114,7 +114,7 @@ class VideomoreIE(InfoExtractor):
|
|||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://videomore.ru/video/tracks/%s.json' % video_id,
|
'http://videomore.ru/video/tracks/%s.json' % video_id,
|
||||||
video_id, 'Downloadinng video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
title = data.get('title') or data['project_title']
|
title = data.get('title') or data['project_title']
|
||||||
description = data.get('description') or data.get('description_raw')
|
description = data.get('description') or data.get('description_raw')
|
||||||
|
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -11,7 +13,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class VidmeIE(InfoExtractor):
|
class VidmeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
|
IE_NAME = 'vidme'
|
||||||
|
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vid.me/QNB',
|
'url': 'https://vid.me/QNB',
|
||||||
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
||||||
@@ -202,3 +205,69 @@ class VidmeIE(InfoExtractor):
|
|||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VidmeListBaseIE(InfoExtractor):
|
||||||
|
# Max possible limit according to https://docs.vid.me/#api-Videos-List
|
||||||
|
_LIMIT = 100
|
||||||
|
|
||||||
|
def _entries(self, user_id, user_name):
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
page = self._download_json(
|
||||||
|
'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
|
||||||
|
% (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
|
||||||
|
user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
|
||||||
|
|
||||||
|
videos = page.get('videos', [])
|
||||||
|
if not videos:
|
||||||
|
break
|
||||||
|
|
||||||
|
for video in videos:
|
||||||
|
video_url = video.get('full_url') or video.get('embed_url')
|
||||||
|
if video_url:
|
||||||
|
yield self.url_result(video_url, VidmeIE.ie_key())
|
||||||
|
|
||||||
|
total = int_or_none(page.get('page', {}).get('total'))
|
||||||
|
if total and self._LIMIT * page_num >= total:
|
||||||
|
break
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_name = self._match_id(url)
|
||||||
|
|
||||||
|
user_id = self._download_json(
|
||||||
|
'https://api.vid.me/userByUsername?username=%s' % user_name,
|
||||||
|
user_name)['user']['user_id']
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(user_id, user_name), user_id,
|
||||||
|
'%s - %s' % (user_name, self._TITLE))
|
||||||
|
|
||||||
|
|
||||||
|
class VidmeUserIE(VidmeListBaseIE):
|
||||||
|
IE_NAME = 'vidme:user'
|
||||||
|
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)'
|
||||||
|
_API_ITEM = 'list'
|
||||||
|
_TITLE = 'Videos'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://vid.me/EFARCHIVE',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3834632',
|
||||||
|
'title': 'EFARCHIVE - %s' % _TITLE,
|
||||||
|
},
|
||||||
|
'playlist_mincount': 238,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VidmeUserLikesIE(VidmeListBaseIE):
|
||||||
|
IE_NAME = 'vidme:user:likes'
|
||||||
|
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes'
|
||||||
|
_API_ITEM = 'likes'
|
||||||
|
_TITLE = 'Likes'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://vid.me/ErinAlexis/likes',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6483530',
|
||||||
|
'title': 'ErinAlexis - %s' % _TITLE,
|
||||||
|
},
|
||||||
|
'playlist_mincount': 415,
|
||||||
|
}
|
||||||
|
@@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
class VidziIE(InfoExtractor):
|
class VidziIE(InfoExtractor):
|
||||||
@@ -13,6 +14,11 @@ class VidziIE(InfoExtractor):
|
|||||||
'id': 'cghql9yq6emu',
|
'id': 'cghql9yq6emu',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
|
'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
|
||||||
|
'uploader': 'vidzi.tv',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -20,19 +26,14 @@ class VidziIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_host = self._html_search_regex(
|
|
||||||
r'id=\'vplayer\'><img src="http://(.*?)/i', webpage,
|
|
||||||
'video host')
|
|
||||||
video_hash = self._html_search_regex(
|
|
||||||
r'\|([a-z0-9]+)\|hls\|type', webpage, 'video_hash')
|
|
||||||
ext = self._html_search_regex(
|
|
||||||
r'\|tracks\|([a-z0-9]+)\|', webpage, 'video ext')
|
|
||||||
video_url = 'http://' + video_host + '/' + video_hash + '/v.' + ext
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
||||||
|
|
||||||
|
# Vidzi now uses jwplayer, which can be handled by GenericIE
|
||||||
return {
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': video_url,
|
'url': smuggle_url(url, {'to_generic': True}),
|
||||||
|
'ie_key': 'Generic',
|
||||||
}
|
}
|
||||||
|
@@ -45,6 +45,10 @@ class ViideaIE(InfoExtractor):
|
|||||||
'upload_date': '20130627',
|
'upload_date': '20130627',
|
||||||
'duration': 565,
|
'duration': 565,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# video with invalid direct format links (HTTP 403)
|
# video with invalid direct format links (HTTP 403)
|
||||||
'url': 'http://videolectures.net/russir2010_filippova_nlp/',
|
'url': 'http://videolectures.net/russir2010_filippova_nlp/',
|
||||||
|
@@ -57,7 +57,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_xsrft_and_vuid(self, webpage):
|
def _extract_xsrft_and_vuid(self, webpage):
|
||||||
xsrft = self._search_regex(
|
xsrft = self._search_regex(
|
||||||
r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||||
webpage, 'login token', group='xsrft')
|
webpage, 'login token', group='xsrft')
|
||||||
vuid = self._search_regex(
|
vuid = self._search_regex(
|
||||||
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
|
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
|
||||||
|
@@ -265,7 +265,7 @@ class VKIE(InfoExtractor):
|
|||||||
return self.url_result(pladform_url)
|
return self.url_result(pladform_url)
|
||||||
|
|
||||||
m_rutube = re.search(
|
m_rutube = re.search(
|
||||||
r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
|
r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
|
||||||
if m_rutube is not None:
|
if m_rutube is not None:
|
||||||
rutube_url = self._proto_relative_url(
|
rutube_url = self._proto_relative_url(
|
||||||
m_rutube.group(1).replace('\\', ''))
|
m_rutube.group(1).replace('\\', ''))
|
||||||
@@ -321,7 +321,7 @@ class VKIE(InfoExtractor):
|
|||||||
class VKUserVideosIE(InfoExtractor):
|
class VKUserVideosIE(InfoExtractor):
|
||||||
IE_NAME = 'vk:uservideos'
|
IE_NAME = 'vk:uservideos'
|
||||||
IE_DESC = "VK - User's Videos"
|
IE_DESC = "VK - User's Videos"
|
||||||
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$'
|
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
|
||||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
_TEMPLATE_URL = 'https://vk.com/videos'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vk.com/videos205387401',
|
'url': 'http://vk.com/videos205387401',
|
||||||
@@ -333,6 +333,9 @@ class VKUserVideosIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://vk.com/videos-77521',
|
'url': 'http://vk.com/videos-77521',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://vk.com/videos-97664626?section=all',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -1,79 +1,80 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import hmac
|
|
||||||
from hashlib import sha1
|
|
||||||
from base64 import b64encode
|
|
||||||
from time import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
dict_get,
|
||||||
determine_ext
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
from ..compat import compat_urllib_parse
|
from ..compat import compat_urllib_parse
|
||||||
|
|
||||||
|
|
||||||
class VLiveIE(InfoExtractor):
|
class VLiveIE(InfoExtractor):
|
||||||
IE_NAME = 'vlive'
|
IE_NAME = 'vlive'
|
||||||
# www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
|
||||||
_VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://m.vlive.tv/video/1326',
|
'url': 'http://www.vlive.tv/video/1326',
|
||||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1326',
|
'id': '1326',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '[V] Girl\'s Day\'s Broadcast',
|
'title': "[V] Girl's Day's Broadcast",
|
||||||
'creator': 'Girl\'s Day',
|
'creator': "Girl's Day",
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
_SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://m.vlive.tv/video/%s' % video_id,
|
'http://www.vlive.tv/video/%s' % video_id, video_id)
|
||||||
video_id, note='Download video page')
|
|
||||||
|
long_video_id = self._search_regex(
|
||||||
|
r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"',
|
||||||
|
webpage, 'long video id')
|
||||||
|
|
||||||
|
key = self._search_regex(
|
||||||
|
r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"',
|
||||||
|
webpage, 'key')
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
creator = self._html_search_regex(
|
|
||||||
r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator')
|
|
||||||
|
|
||||||
url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id
|
playinfo = self._download_json(
|
||||||
msgpad = '%.0f' % (time() * 1000)
|
'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
|
||||||
md = b64encode(
|
% compat_urllib_parse.urlencode({
|
||||||
hmac.new(self._SECRET.encode('ascii'),
|
'videoId': long_video_id,
|
||||||
(url[:255] + msgpad).encode('ascii'), sha1).digest()
|
'key': key,
|
||||||
)
|
'ptc': 'http',
|
||||||
url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md})
|
'doct': 'json', # document type (xml or json)
|
||||||
playinfo = self._download_json(url, video_id, 'Downloading video json')
|
'cpt': 'vtt', # captions type (vtt or ttml)
|
||||||
|
}), video_id)
|
||||||
|
|
||||||
if playinfo.get('message', '') != 'success':
|
formats = [{
|
||||||
raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful'))
|
|
||||||
|
|
||||||
if not playinfo.get('result'):
|
|
||||||
raise ExtractorError('No videos found.')
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for vid in playinfo['result'].get('videos', {}).get('list', []):
|
|
||||||
formats.append({
|
|
||||||
'url': vid['source'],
|
'url': vid['source'],
|
||||||
'ext': 'mp4',
|
'format_id': vid.get('encodingOption', {}).get('name'),
|
||||||
'abr': vid.get('bitrate', {}).get('audio'),
|
'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
|
||||||
'vbr': vid.get('bitrate', {}).get('video'),
|
'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
|
||||||
'format_id': vid['encodingOption']['name'],
|
'width': int_or_none(vid.get('encodingOption', {}).get('width')),
|
||||||
'height': vid.get('height'),
|
'height': int_or_none(vid.get('encodingOption', {}).get('height')),
|
||||||
'width': vid.get('width'),
|
'filesize': int_or_none(vid.get('size')),
|
||||||
})
|
} for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
creator = self._html_search_regex(
|
||||||
|
r'<div[^>]+class="info_area"[^>]*>\s*<strong[^>]+class="name"[^>]*>([^<]+)</strong>',
|
||||||
|
webpage, 'creator', fatal=False)
|
||||||
|
|
||||||
|
view_count = int_or_none(playinfo.get('meta', {}).get('count'))
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for caption in playinfo['result'].get('captions', {}).get('list', []):
|
for caption in playinfo.get('captions', {}).get('list', []):
|
||||||
subtitles[caption['language']] = [
|
lang = dict_get(caption, ('language', 'locale', 'country', 'label'))
|
||||||
{'ext': determine_ext(caption['source'], default_ext='vtt'),
|
if lang and caption.get('source'):
|
||||||
|
subtitles[lang] = [{
|
||||||
|
'ext': 'vtt',
|
||||||
'url': caption['source']}]
|
'url': caption['source']}]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -81,6 +82,7 @@ class VLiveIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'creator': creator,
|
'creator': creator,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'view_count': view_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@@ -34,19 +34,20 @@ class XuiteIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Video with only one format
|
# Video with only one format
|
||||||
'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2',
|
'url': 'http://vlog.xuite.net/play/WUxxR2xCLTI1OTI1MDk5LmZsdg==',
|
||||||
'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e',
|
'md5': '21f7b39c009b5a4615b4463df6eb7a46',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3441629',
|
'id': '25925099',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '孫燕姿 - 眼淚成詩',
|
'title': 'BigBuckBunny_320x180',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'duration': 217.399,
|
'duration': 596.458,
|
||||||
'timestamp': 1299383640,
|
'timestamp': 1454242500,
|
||||||
'upload_date': '20110306',
|
'upload_date': '20160131',
|
||||||
'uploader': 'Valen',
|
'uploader': 'yan12125',
|
||||||
'uploader_id': '10400126',
|
'uploader_id': '12158353',
|
||||||
'categories': ['影視娛樂'],
|
'categories': ['個人短片'],
|
||||||
|
'description': 'http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Video with two formats
|
# Video with two formats
|
||||||
|
@@ -229,6 +229,9 @@ class YoukuIE(InfoExtractor):
|
|||||||
if error_note is not None and '因版权原因无法观看此视频' in error_note:
|
if error_note is not None and '因版权原因无法观看此视频' in error_note:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Youku said: Sorry, this video is available in China only', expected=True)
|
'Youku said: Sorry, this video is available in China only', expected=True)
|
||||||
|
elif error_note and '该视频被设为私密' in error_note:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Youku said: Sorry, this video is private', expected=True)
|
||||||
else:
|
else:
|
||||||
msg = 'Youku server reported error %i' % error.get('code')
|
msg = 'Youku server reported error %i' % error.get('code')
|
||||||
if error_note is not None:
|
if error_note is not None:
|
||||||
|
@@ -114,15 +114,13 @@ class YouPornIE(InfoExtractor):
|
|||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._og_search_description(webpage, default=None)
|
||||||
r'(?s)<div[^>]+class=["\']video-description["\'][^>]*>(.+?)</div>',
|
|
||||||
webpage, 'description', default=None)
|
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
||||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=["\']videoInfoBy["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>',
|
r'(?s)<div[^>]+class=["\']videoInfoBy(?:\s+[^"\']+)?["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>',
|
r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>',
|
||||||
|
@@ -181,7 +181,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class YoutubeEntryListBaseInfoExtractor(InfoExtractor):
|
class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
# Extract entries from page with "Load more" button
|
# Extract entries from page with "Load more" button
|
||||||
def _entries(self, page, playlist_id):
|
def _entries(self, page, playlist_id):
|
||||||
more_widget_html = content_html = page
|
more_widget_html = content_html = page
|
||||||
@@ -233,7 +233,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
|||||||
|
|
||||||
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||||
def _process_page(self, content):
|
def _process_page(self, content):
|
||||||
for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content):
|
for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
|
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
|
||||||
|
|
||||||
@@ -286,7 +286,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||||
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||||
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||||
'36': {'ext': '3gp', 'width': 320, 'height': 240, 'acodec': 'aac', 'abr': 32, 'vcodec': 'mp4v'},
|
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
|
||||||
|
'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||||
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||||
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||||
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
||||||
@@ -316,64 +317,65 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
|
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
|
||||||
|
|
||||||
# DASH mp4 video
|
# DASH mp4 video
|
||||||
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40},
|
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40},
|
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40},
|
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40},
|
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40},
|
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
||||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40},
|
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40},
|
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
||||||
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
||||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40},
|
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
|
|
||||||
# Dash mp4 audio
|
# Dash mp4 audio
|
||||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
|
|
||||||
# Dash webm
|
# Dash webm
|
||||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
|
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40},
|
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40},
|
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40},
|
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40},
|
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40},
|
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40},
|
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40},
|
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40},
|
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
||||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40},
|
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||||
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40},
|
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||||
|
|
||||||
# Dash webm audio
|
# Dash webm audio
|
||||||
'171': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
||||||
'172': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
||||||
|
|
||||||
# Dash webm audio with opus inside
|
# Dash webm audio with opus inside
|
||||||
'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
|
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
|
||||||
'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
|
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
|
||||||
'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
|
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
|
||||||
|
|
||||||
# RTMP (unnamed)
|
# RTMP (unnamed)
|
||||||
'_rtmp': {'protocol': 'rtmp'},
|
'_rtmp': {'protocol': 'rtmp'},
|
||||||
}
|
}
|
||||||
|
_SUBTITLE_FORMATS = ('ttml', 'vtt')
|
||||||
|
|
||||||
IE_NAME = 'youtube'
|
IE_NAME = 'youtube'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
|
'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'BaW_jenozKc',
|
'id': 'BaW_jenozKc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -439,7 +441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
|
'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
|
||||||
'note': 'Use the first video ID in the URL',
|
'note': 'Use the first video ID in the URL',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'BaW_jenozKc',
|
'id': 'BaW_jenozKc',
|
||||||
@@ -702,6 +704,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
|
||||||
|
'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gVfLd0zydlo',
|
||||||
|
'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://vid.plus/FlRa-iH7PGw',
|
'url': 'http://vid.plus/FlRa-iH7PGw',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -918,7 +929,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if lang in sub_lang_list:
|
if lang in sub_lang_list:
|
||||||
continue
|
continue
|
||||||
sub_formats = []
|
sub_formats = []
|
||||||
for ext in ['sbv', 'vtt', 'srt']:
|
for ext in self._SUBTITLE_FORMATS:
|
||||||
params = compat_urllib_parse.urlencode({
|
params = compat_urllib_parse.urlencode({
|
||||||
'lang': lang,
|
'lang': lang,
|
||||||
'v': video_id,
|
'v': video_id,
|
||||||
@@ -988,7 +999,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
for lang_node in caption_list.findall('target'):
|
for lang_node in caption_list.findall('target'):
|
||||||
sub_lang = lang_node.attrib['lang_code']
|
sub_lang = lang_node.attrib['lang_code']
|
||||||
sub_formats = []
|
sub_formats = []
|
||||||
for ext in ['sbv', 'vtt', 'srt']:
|
for ext in self._SUBTITLE_FORMATS:
|
||||||
params = compat_urllib_parse.urlencode({
|
params = compat_urllib_parse.urlencode({
|
||||||
'lang': original_lang,
|
'lang': original_lang,
|
||||||
'tlang': sub_lang,
|
'tlang': sub_lang,
|
||||||
@@ -1035,73 +1046,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
||||||
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
||||||
|
|
||||||
def _parse_dash_manifest(
|
|
||||||
self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
|
|
||||||
def decrypt_sig(mobj):
|
|
||||||
s = mobj.group(1)
|
|
||||||
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
|
|
||||||
return '/signature/%s' % dec_s
|
|
||||||
dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
|
|
||||||
dash_doc = self._download_xml(
|
|
||||||
dash_manifest_url, video_id,
|
|
||||||
note='Downloading DASH manifest',
|
|
||||||
errnote='Could not download DASH manifest',
|
|
||||||
fatal=fatal)
|
|
||||||
|
|
||||||
if dash_doc is False:
|
|
||||||
return []
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
|
|
||||||
mime_type = a.attrib.get('mimeType')
|
|
||||||
for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
|
||||||
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
|
|
||||||
if url_el is None:
|
|
||||||
continue
|
|
||||||
if mime_type == 'text/vtt':
|
|
||||||
# TODO implement WebVTT downloading
|
|
||||||
pass
|
|
||||||
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
|
|
||||||
segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
|
|
||||||
format_id = r.attrib['id']
|
|
||||||
video_url = url_el.text
|
|
||||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
|
|
||||||
f = {
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': video_url,
|
|
||||||
'width': int_or_none(r.attrib.get('width')),
|
|
||||||
'height': int_or_none(r.attrib.get('height')),
|
|
||||||
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
|
||||||
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
|
||||||
'filesize': filesize,
|
|
||||||
'fps': int_or_none(r.attrib.get('frameRate')),
|
|
||||||
}
|
|
||||||
if segment_list is not None:
|
|
||||||
f.update({
|
|
||||||
'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
|
|
||||||
'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
|
|
||||||
'protocol': 'http_dash_segments',
|
|
||||||
})
|
|
||||||
try:
|
|
||||||
existing_format = next(
|
|
||||||
fo for fo in formats
|
|
||||||
if fo['format_id'] == format_id)
|
|
||||||
except StopIteration:
|
|
||||||
full_info = self._formats.get(format_id, {}).copy()
|
|
||||||
full_info.update(f)
|
|
||||||
codecs = r.attrib.get('codecs')
|
|
||||||
if codecs:
|
|
||||||
if full_info.get('acodec') == 'none':
|
|
||||||
full_info['vcodec'] = codecs
|
|
||||||
elif full_info.get('vcodec') == 'none':
|
|
||||||
full_info['acodec'] = codecs
|
|
||||||
formats.append(full_info)
|
|
||||||
else:
|
|
||||||
existing_format.update(f)
|
|
||||||
else:
|
|
||||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
@@ -1261,9 +1205,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if not self._downloader.params.get('noplaylist'):
|
if not self._downloader.params.get('noplaylist'):
|
||||||
entries = []
|
entries = []
|
||||||
feed_ids = []
|
feed_ids = []
|
||||||
multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
|
multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
|
||||||
for feed in multifeed_metadata_list.split(','):
|
for feed in multifeed_metadata_list.split(','):
|
||||||
feed_data = compat_parse_qs(feed)
|
# Unquote should take place before split on comma (,) since textual
|
||||||
|
# fields may contain comma as well (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/8536)
|
||||||
|
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'Youtube',
|
'ie_key': 'Youtube',
|
||||||
@@ -1530,11 +1477,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
# Look for the DASH manifest
|
# Look for the DASH manifest
|
||||||
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
dash_mpd_fatal = True
|
dash_mpd_fatal = True
|
||||||
for dash_manifest_url in dash_mpds:
|
for mpd_url in dash_mpds:
|
||||||
dash_formats = {}
|
dash_formats = {}
|
||||||
try:
|
try:
|
||||||
for df in self._parse_dash_manifest(
|
def decrypt_sig(mobj):
|
||||||
video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
|
s = mobj.group(1)
|
||||||
|
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
|
||||||
|
return '/signature/%s' % dec_s
|
||||||
|
|
||||||
|
mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
|
||||||
|
|
||||||
|
for df in self._extract_mpd_formats(
|
||||||
|
mpd_url, video_id, fatal=dash_mpd_fatal,
|
||||||
|
formats_dict=self._formats):
|
||||||
# Do not overwrite DASH format found in some previous DASH manifest
|
# Do not overwrite DASH format found in some previous DASH manifest
|
||||||
if df['format_id'] not in dash_formats:
|
if df['format_id'] not in dash_formats:
|
||||||
dash_formats[df['format_id']] = df
|
dash_formats[df['format_id']] = df
|
||||||
@@ -1602,7 +1557,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
|
class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com playlists'
|
IE_DESC = 'YouTube.com playlists'
|
||||||
_VALID_URL = r"""(?x)(?:
|
_VALID_URL = r"""(?x)(?:
|
||||||
(?:https?://)?
|
(?:https?://)?
|
||||||
|
@@ -415,6 +415,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--hls-prefer-native',
|
'--hls-prefer-native',
|
||||||
dest='hls_prefer_native', action='store_true',
|
dest='hls_prefer_native', action='store_true',
|
||||||
help='Use the native HLS downloader instead of ffmpeg (experimental)')
|
help='Use the native HLS downloader instead of ffmpeg (experimental)')
|
||||||
|
downloader.add_option(
|
||||||
|
'--hls-use-mpegts',
|
||||||
|
dest='hls_use_mpegts', action='store_true',
|
||||||
|
help='Use the mpegts container for HLS videos, allowing to play the '
|
||||||
|
'video while downloading (some players may not be able to play it)')
|
||||||
downloader.add_option(
|
downloader.add_option(
|
||||||
'--external-downloader',
|
'--external-downloader',
|
||||||
dest='external_downloader', metavar='COMMAND',
|
dest='external_downloader', metavar='COMMAND',
|
||||||
|
@@ -391,6 +391,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
|||||||
for (name, value) in metadata.items():
|
for (name, value) in metadata.items():
|
||||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||||
|
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/8350
|
||||||
|
if info.get('protocol') == 'm3u8_native' or info.get('protocol') == 'm3u8' and self._downloader.params.get('hls_prefer_native', False):
|
||||||
|
options.extend(['-bsf:a', 'aac_adtstoasc'])
|
||||||
|
|
||||||
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
|
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
|
||||||
self.run_ffmpeg(filename, temp_filename, options)
|
self.run_ffmpeg(filename, temp_filename, options)
|
||||||
os.remove(encodeFilename(filename))
|
os.remove(encodeFilename(filename))
|
||||||
@@ -479,6 +483,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
|
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
|
||||||
return [], info
|
return [], info
|
||||||
self._downloader.to_screen('[ffmpeg] Converting subtitles')
|
self._downloader.to_screen('[ffmpeg] Converting subtitles')
|
||||||
|
sub_filenames = []
|
||||||
for lang, sub in subs.items():
|
for lang, sub in subs.items():
|
||||||
ext = sub['ext']
|
ext = sub['ext']
|
||||||
if ext == new_ext:
|
if ext == new_ext:
|
||||||
@@ -486,6 +491,8 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
'[ffmpeg] Subtitle file for %s is already in the requested'
|
'[ffmpeg] Subtitle file for %s is already in the requested'
|
||||||
'format' % new_ext)
|
'format' % new_ext)
|
||||||
continue
|
continue
|
||||||
|
old_file = subtitles_filename(filename, lang, ext)
|
||||||
|
sub_filenames.append(old_file)
|
||||||
new_file = subtitles_filename(filename, lang, new_ext)
|
new_file = subtitles_filename(filename, lang, new_ext)
|
||||||
|
|
||||||
if ext == 'dfxp' or ext == 'ttml':
|
if ext == 'dfxp' or ext == 'ttml':
|
||||||
@@ -493,7 +500,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
'You have requested to convert dfxp (TTML) subtitles into another format, '
|
'You have requested to convert dfxp (TTML) subtitles into another format, '
|
||||||
'which results in style information loss')
|
'which results in style information loss')
|
||||||
|
|
||||||
dfxp_file = subtitles_filename(filename, lang, ext)
|
dfxp_file = old_file
|
||||||
srt_file = subtitles_filename(filename, lang, 'srt')
|
srt_file = subtitles_filename(filename, lang, 'srt')
|
||||||
|
|
||||||
with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
|
with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
|
||||||
@@ -501,8 +508,8 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
|
|
||||||
with io.open(srt_file, 'wt', encoding='utf-8') as f:
|
with io.open(srt_file, 'wt', encoding='utf-8') as f:
|
||||||
f.write(srt_data)
|
f.write(srt_data)
|
||||||
|
old_file = srt_file
|
||||||
|
|
||||||
ext = 'srt'
|
|
||||||
subs[lang] = {
|
subs[lang] = {
|
||||||
'ext': 'srt',
|
'ext': 'srt',
|
||||||
'data': srt_data
|
'data': srt_data
|
||||||
@@ -510,15 +517,15 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
|
|
||||||
if new_ext == 'srt':
|
if new_ext == 'srt':
|
||||||
continue
|
continue
|
||||||
|
else:
|
||||||
|
sub_filenames.append(srt_file)
|
||||||
|
|
||||||
self.run_ffmpeg(
|
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
|
||||||
subtitles_filename(filename, lang, ext),
|
|
||||||
new_file, ['-f', new_format])
|
|
||||||
|
|
||||||
with io.open(new_file, 'rt', encoding='utf-8') as f:
|
with io.open(new_file, 'rt', encoding='utf-8') as f:
|
||||||
subs[lang] = {
|
subs[lang] = {
|
||||||
'ext': ext,
|
'ext': new_ext,
|
||||||
'data': f.read(),
|
'data': f.read(),
|
||||||
}
|
}
|
||||||
|
|
||||||
return [], info
|
return sub_filenames, info
|
||||||
|
@@ -56,7 +56,7 @@ from .compat import (
|
|||||||
compiled_regex_type = type(re.compile(''))
|
compiled_regex_type = type(re.compile(''))
|
||||||
|
|
||||||
std_headers = {
|
std_headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)',
|
||||||
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
'Accept-Encoding': 'gzip, deflate',
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
@@ -1717,6 +1717,16 @@ def encode_dict(d, encoding='utf-8'):
|
|||||||
return dict((encode(k), encode(v)) for k, v in d.items())
|
return dict((encode(k), encode(v)) for k, v in d.items())
|
||||||
|
|
||||||
|
|
||||||
|
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||||
|
if isinstance(key_or_keys, (list, tuple)):
|
||||||
|
for key in key_or_keys:
|
||||||
|
if key not in d or d[key] is None or skip_false_values and not d[key]:
|
||||||
|
continue
|
||||||
|
return d[key]
|
||||||
|
return default
|
||||||
|
return d.get(key_or_keys, default)
|
||||||
|
|
||||||
|
|
||||||
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
|
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
|
||||||
return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
|
return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
|
||||||
|
|
||||||
@@ -1739,7 +1749,7 @@ def parse_age_limit(s):
|
|||||||
|
|
||||||
def strip_jsonp(code):
|
def strip_jsonp(code):
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
||||||
|
|
||||||
|
|
||||||
def js_to_json(code):
|
def js_to_json(code):
|
||||||
@@ -2017,20 +2027,27 @@ def dfxp2srt(dfxp_data):
|
|||||||
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
||||||
})
|
})
|
||||||
|
|
||||||
|
class TTMLPElementParser(object):
|
||||||
|
out = ''
|
||||||
|
|
||||||
|
def start(self, tag, attrib):
|
||||||
|
if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
||||||
|
self.out += '\n'
|
||||||
|
|
||||||
|
def end(self, tag):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def data(self, data):
|
||||||
|
self.out += data
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
return self.out.strip()
|
||||||
|
|
||||||
def parse_node(node):
|
def parse_node(node):
|
||||||
str_or_empty = functools.partial(str_or_none, default='')
|
target = TTMLPElementParser()
|
||||||
|
parser = xml.etree.ElementTree.XMLParser(target=target)
|
||||||
out = str_or_empty(node.text)
|
parser.feed(xml.etree.ElementTree.tostring(node))
|
||||||
|
return parser.close()
|
||||||
for child in node:
|
|
||||||
if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
|
||||||
out += '\n' + str_or_empty(child.tail)
|
|
||||||
elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
|
|
||||||
out += str_or_empty(parse_node(child))
|
|
||||||
else:
|
|
||||||
out += str_or_empty(xml.etree.ElementTree.tostring(child))
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
||||||
out = []
|
out = []
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2016.01.29'
|
__version__ = '2016.02.13'
|
||||||
|
Reference in New Issue
Block a user