diff -Nru youtube-dl-2016.08.22/ChangeLog youtube-dl-2016.08.24.1/ChangeLog --- youtube-dl-2016.08.22/ChangeLog 2016-08-21 21:17:28.000000000 +0000 +++ youtube-dl-2016.08.24.1/ChangeLog 2016-08-24 03:11:02.000000000 +0000 @@ -1,3 +1,22 @@ +version 2016.08.24.1 + +Extractors ++ [pluralsight] Add support for subtitles (#9681) + + +version 2016.08.24 + +Extractors +* [youtube] Fix authentication (#10392) +* [openload] Fix extraction (#10408) ++ [bravotv] Add support for Adobe Pass (#10407) +* [bravotv] Fix clip info extraction (#10407) +* [eagleplatform] Improve embedded videos detection (#10409) +* [awaan] Fix extraction +* [mtvservices:embedded] Update config URL ++ [abc:iview] Add extractor (#6148) + + version 2016.08.22 Core diff -Nru youtube-dl-2016.08.22/debian/changelog youtube-dl-2016.08.24.1/debian/changelog --- youtube-dl-2016.08.22/debian/changelog 2016-08-23 10:32:58.000000000 +0000 +++ youtube-dl-2016.08.24.1/debian/changelog 2016-08-24 10:02:43.000000000 +0000 @@ -1,8 +1,14 @@ -youtube-dl (1:2016.08.22-1~webupd8~vivid0) vivid; urgency=medium +youtube-dl (1:2016.08.24.1-1~webupd8~vivid0) vivid; urgency=medium * New upstream release (automated upload); Sync packaging with Ubuntu Yakkety - -- Alin Andrei Tue, 23 Aug 2016 13:32:58 +0300 + -- Alin Andrei Wed, 24 Aug 2016 13:02:43 +0300 + +youtube-dl (1:2016.08.22-1~webupd8~trusty0) trusty; urgency=medium + + * New upstream release (automated upload); Sync packaging with Ubuntu Yakkety + + -- Alin Andrei Tue, 23 Aug 2016 13:33:04 +0300 youtube-dl (1:2016.08.19-1~webupd8~trusty0) trusty; urgency=medium diff -Nru youtube-dl-2016.08.22/docs/supportedsites.md youtube-dl-2016.08.24.1/docs/supportedsites.md --- youtube-dl-2016.08.22/docs/supportedsites.md 2016-08-21 21:17:34.000000000 +0000 +++ youtube-dl-2016.08.24.1/docs/supportedsites.md 2016-08-24 03:11:04.000000000 +0000 @@ -16,6 +16,7 @@ - **9gag** - **9now.com.au** - **abc.net.au** + - **abc.net.au:iview** - **Abc7News** - **abcnews** - **abcnews:video** @@ -66,6 +67,10 @@ - **audiomack** - **audiomack:album** - **auroravid**: AuroraVid + - **AWAAN** + - **awaan:live** + - **awaan:season** + - **awaan:video** - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -172,10 +177,6 @@ - **daum.net:playlist** - **daum.net:user** - **DBTV** - - **DCN** - - **dcn:live** - - **dcn:season** - - **dcn:video** - **DctpTv** - **DeezerPlaylist** - **defense.gouv.fr** diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/abc.py youtube-dl-2016.08.24.1/youtube_dl/extractor/abc.py --- youtube-dl-2016.08.22/youtube_dl/extractor/abc.py 2016-08-21 21:17:02.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/abc.py 2016-08-24 03:05:40.000000000 +0000 @@ -7,6 +7,7 @@ ExtractorError, js_to_json, int_or_none, + parse_iso8601, ) @@ -93,3 +94,57 @@ 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } + + +class ABCIViewIE(InfoExtractor): + IE_NAME = 'abc.net.au:iview' + _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P[^/?#]+)' + + _TESTS = [{ + 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', + 'md5': '979d10b2939101f0d27a06b79edad536', + 'info_dict': { + 'id': 'FA1505V024S00', + 'ext': 'mp4', + 'title': 'Series 27 Ep 24', + 'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d', + 'upload_date': '20160820', + 'uploader_id': 'abc1', + 'timestamp': 1471719600, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_params = self._parse_json(self._search_regex( + r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id) + title = video_params['title'] + stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') + + formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) + self._sort_formats(formats) + + subtitles = {} + src_vtt = stream.get('captions', {}).get('src-vtt') + if src_vtt: + subtitles['en'] = [{ + 'url': src_vtt, + 'ext': 'vtt', + }] + + return { + 'id': video_id, + 'title': title, + 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage), + 'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage), + 'duration': int_or_none(video_params.get('eventDuration')), + 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), + 'series': video_params.get('seriesTitle'), + 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], + 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)), + 'episode': self._html_search_meta('episode_title', webpage), + 'uploader_id': video_params.get('channel'), + 'formats': formats, + 'subtitles': subtitles, + } diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/awaan.py youtube-dl-2016.08.24.1/youtube_dl/extractor/awaan.py --- youtube-dl-2016.08.22/youtube_dl/extractor/awaan.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/awaan.py 2016-08-24 03:05:40.000000000 +0000 @@ -0,0 +1,199 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import base64 + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlencode, + compat_str, +) +from ..utils import ( + int_or_none, + parse_iso8601, + smuggle_url, + unsmuggle_url, + urlencode_postdata, +) + + +class AWAANIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P\d+)/[^/]+(?:/(?P\d+)/(?P\d+))?' + + def _real_extract(self, url): + show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() + if video_id and int(video_id) > 0: + return self.url_result( + 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') + elif season_id and int(season_id) > 0: + return self.url_result(smuggle_url( + 'http://awaan.ae/program/season/%s' % season_id, + {'show_id': show_id}), 'AWAANSeason') + else: + return self.url_result( + 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') + + +class AWAANBaseIE(InfoExtractor): + def _parse_video_data(self, video_data, video_id, is_live): + title = video_data.get('title_en') or video_data['title_ar'] + img = video_data.get('img') + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'description': video_data.get('description_en') or video_data.get('description_ar'), + 'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None, + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': parse_iso8601(video_data.get('create_time'), ' '), + 'is_live': is_live, + } + + def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol): + formats = [] + format_url_base = 'http' + self._html_search_regex( + [ + r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', + r']+href="rtsp(://[^"]+)"' + ], webpage, 'format url') + formats.extend(self._extract_mpd_formats( + format_url_base + '/manifest.mpd', + video_id, mpd_id='dash', fatal=False)) + formats.extend(self._extract_m3u8_formats( + format_url_base + '/playlist.m3u8', video_id, 'mp4', + m3u8_entry_protocol, m3u8_id='hls', fatal=False)) + formats.extend(self._extract_f4m_formats( + format_url_base + '/manifest.f4m', + video_id, f4m_id='hds', fatal=False)) + self._sort_formats(formats) + return formats + + +class AWAANVideoIE(AWAANBaseIE): + IE_NAME = 'awaan:video' + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', + 'md5': '5f61c33bfc7794315c671a62d43116aa', + 'info_dict': + { + 'id': '17375', + 'ext': 'mp4', + 'title': 'رحلة العمر : الحلقة 1', + 'description': 'md5:0156e935d870acb8ef0a66d24070c6d6', + 'duration': 2041, + 'timestamp': 1227504126, + 'upload_date': '20081124', + }, + }, { + 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_data = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, + video_id, headers={'Origin': 'http://awaan.ae'}) + info = self._parse_video_data(video_data, video_id, False) + + webpage = self._download_webpage( + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + + compat_urllib_parse_urlencode({ + 'id': video_data['id'], + 'user_id': video_data['user_id'], + 'signature': video_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }), video_id) + info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native') + return info + + +class AWAANLiveIE(AWAANBaseIE): + IE_NAME = 'awaan:live' + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P\d+)' + _TEST = { + 'url': 'http://awaan.ae/live/6/dubai-tv', + 'info_dict': { + 'id': '6', + 'ext': 'mp4', + 'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'upload_date': '20150107', + 'timestamp': 1420588800, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + + channel_data = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, + channel_id, headers={'Origin': 'http://awaan.ae'}) + info = self._parse_video_data(channel_data, channel_id, True) + + webpage = self._download_webpage( + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + + compat_urllib_parse_urlencode({ + 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), + 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), + 'signature': channel_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }), channel_id) + info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8') + return info + + +class AWAANSeasonIE(InfoExtractor): + IE_NAME = 'awaan:season' + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P\d+)|season/(?P\d+))' + _TEST = { + 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', + 'info_dict': + { + 'id': '7910', + 'title': 'محاضرات الشيخ الشعراوي', + }, + 'playlist_mincount': 27, + } + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + show_id, season_id = re.match(self._VALID_URL, url).groups() + + data = {} + if season_id: + data['season'] = season_id + show_id = smuggled_data.get('show_id') + if show_id is None: + season = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, + season_id, headers={'Origin': 'http://awaan.ae'}) + show_id = season['id'] + data['show_id'] = show_id + show = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/show', + show_id, data=urlencode_postdata(data), headers={ + 'Origin': 'http://awaan.ae', + 'Content-Type': 'application/x-www-form-urlencoded' + }) + if not season_id: + season_id = show['default_season'] + for season in show['seasons']: + if season['id'] == season_id: + title = season.get('title_en') or season['title_ar'] + + entries = [] + for video in show['videos']: + video_id = compat_str(video['id']) + entries.append(self.url_result( + 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) + + return self.playlist_result(entries, season_id, title) diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/bravotv.py youtube-dl-2016.08.24.1/youtube_dl/extractor/bravotv.py --- youtube-dl-2016.08.22/youtube_dl/extractor/bravotv.py 2016-08-21 21:17:02.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/bravotv.py 2016-08-24 03:05:40.000000000 +0000 @@ -1,31 +1,74 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import smuggle_url +from .adobepass import AdobePassIE +from ..utils import ( + smuggle_url, + update_url_query, + int_or_none, +) -class BravoTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P[^/?]+)' - _TEST = { +class BravoTVIE(AdobePassIE): + _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P[^/?#]+)' + _TESTS = [{ 'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale', - 'md5': 'd60cdf68904e854fac669bd26cccf801', + 'md5': '9086d0b7ef0ea2aabc4781d75f4e5863', 'info_dict': { - 'id': 'LitrBdX64qLn', + 'id': 'zHyk1_HU_mPy', 'ext': 'mp4', - 'title': 'Last Chance Kitchen Returns', - 'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13', - 'timestamp': 1448926740, - 'upload_date': '20151130', + 'title': 'LCK Ep 12: Fishy Finale', + 'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.', 'uploader': 'NBCU-BRAV', + 'upload_date': '20160302', + 'timestamp': 1456945320, } - } + }, { + 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid') - release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid') - return self.url_result(smuggle_url( - 'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid), - {'force_smil_url': True}), 'ThePlatform', release_pid) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), + display_id) + info = {} + query = { + 'mbr': 'true', + } + account_pid, release_pid = [None] * 2 + tve = settings.get('sharedTVE') + if tve: + query['manifest'] = 'm3u' + account_pid = 'HNK2IC' + release_pid = tve['release_pid'] + if tve.get('entitlement') == 'auth': + adobe_pass = settings.get('adobePass', {}) + resource = self._get_mvpd_resource( + adobe_pass.get('adobePassResourceId', 'bravo'), + tve['title'], release_pid, tve.get('rating')) + query['auth'] = self._extract_mvpd_auth( + url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource) + else: + shared_playlist = settings['shared_playlist'] + account_pid = shared_playlist['account_pid'] + metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] + release_pid = metadata['release_pid'] + info.update({ + 'title': metadata['title'], + 'description': metadata.get('description'), + 'season_number': int_or_none(metadata.get('season_num')), + 'episode_number': int_or_none(metadata.get('episode_num')), + }) + query['switch'] = 'progressive' + info.update({ + '_type': 'url_transparent', + 'id': release_pid, + 'url': smuggle_url(update_url_query( + 'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid), + query), {'force_smil_url': True}), + 'ie_key': 'ThePlatform', + }) + return info diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/common.py youtube-dl-2016.08.24.1/youtube_dl/extractor/common.py --- youtube-dl-2016.08.22/youtube_dl/extractor/common.py 2016-08-21 21:17:21.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/common.py 2016-08-24 03:05:40.000000000 +0000 @@ -1765,6 +1765,18 @@ entries.append(media_info) return entries + def _extract_akamai_formats(self, manifest_url, video_id): + formats = [] + f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') + formats.extend(self._extract_f4m_formats( + update_url_query(f4m_url, {'hdcore': '3.7.0'}), + video_id, f4m_id='hds', fatal=False)) + m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + return formats + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/dcn.py youtube-dl-2016.08.24.1/youtube_dl/extractor/dcn.py --- youtube-dl-2016.08.22/youtube_dl/extractor/dcn.py 2016-08-21 21:17:03.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/dcn.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,200 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -import base64 - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_str, -) -from ..utils import ( - int_or_none, - parse_iso8601, - sanitized_Request, - smuggle_url, - unsmuggle_url, - urlencode_postdata, -) - - -class DCNIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P\d+)/[^/]+(?:/(?P\d+)/(?P\d+))?' - - def _real_extract(self, url): - show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() - if video_id and int(video_id) > 0: - return self.url_result( - 'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo') - elif season_id and int(season_id) > 0: - return self.url_result(smuggle_url( - 'http://www.dcndigital.ae/program/season/%s' % season_id, - {'show_id': show_id}), 'DCNSeason') - else: - return self.url_result( - 'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason') - - -class DCNBaseIE(InfoExtractor): - def _extract_video_info(self, video_data, video_id, is_live): - title = video_data.get('title_en') or video_data['title_ar'] - img = video_data.get('img') - thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None - duration = int_or_none(video_data.get('duration')) - description = video_data.get('description_en') or video_data.get('description_ar') - timestamp = parse_iso8601(video_data.get('create_time'), ' ') - - return { - 'id': video_id, - 'title': self._live_title(title) if is_live else title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'is_live': is_live, - } - - def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol): - formats = [] - format_url_base = 'http' + self._html_search_regex( - [ - r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', - r']+href="rtsp(://[^"]+)"' - ], webpage, 'format url') - formats.extend(self._extract_mpd_formats( - format_url_base + '/manifest.mpd', - video_id, mpd_id='dash', fatal=False)) - formats.extend(self._extract_m3u8_formats( - format_url_base + '/playlist.m3u8', video_id, 'mp4', - m3u8_entry_protocol, m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - format_url_base + '/manifest.f4m', - video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - return formats - - -class DCNVideoIE(DCNBaseIE): - IE_NAME = 'dcn:video' - _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', - 'info_dict': - { - 'id': '17375', - 'ext': 'mp4', - 'title': 'رحلة العمر : الحلقة 1', - 'description': 'md5:0156e935d870acb8ef0a66d24070c6d6', - 'duration': 2041, - 'timestamp': 1227504126, - 'upload_date': '20081124', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - request = sanitized_Request( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, - headers={'Origin': 'http://www.dcndigital.ae'}) - video_data = self._download_json(request, video_id) - info = self._extract_video_info(video_data, video_id, False) - - webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + - compat_urllib_parse_urlencode({ - 'id': video_data['id'], - 'user_id': video_data['user_id'], - 'signature': video_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }), video_id) - info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native') - return info - - -class DCNLiveIE(DCNBaseIE): - IE_NAME = 'dcn:live' - _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P\d+)' - - def _real_extract(self, url): - channel_id = self._match_id(url) - - request = sanitized_Request( - 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, - headers={'Origin': 'http://www.dcndigital.ae'}) - - channel_data = self._download_json(request, channel_id) - info = self._extract_video_info(channel_data, channel_id, True) - - webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + - compat_urllib_parse_urlencode({ - 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), - 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), - 'signature': channel_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }), channel_id) - info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8') - return info - - -class DCNSeasonIE(InfoExtractor): - IE_NAME = 'dcn:season' - _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P\d+)|season/(?P\d+))' - _TEST = { - 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', - 'info_dict': - { - 'id': '7910', - 'title': 'محاضرات الشيخ الشعراوي', - }, - 'playlist_mincount': 27, - } - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - show_id, season_id = re.match(self._VALID_URL, url).groups() - - data = {} - if season_id: - data['season'] = season_id - show_id = smuggled_data.get('show_id') - if show_id is None: - request = sanitized_Request( - 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, - headers={'Origin': 'http://www.dcndigital.ae'}) - season = self._download_json(request, season_id) - show_id = season['id'] - data['show_id'] = show_id - request = sanitized_Request( - 'http://admin.mangomolo.com/analytics/index.php/plus/show', - urlencode_postdata(data), - { - 'Origin': 'http://www.dcndigital.ae', - 'Content-Type': 'application/x-www-form-urlencoded' - }) - - show = self._download_json(request, show_id) - if not season_id: - season_id = show['default_season'] - for season in show['seasons']: - if season['id'] == season_id: - title = season.get('title_en') or season['title_ar'] - - entries = [] - for video in show['videos']: - video_id = compat_str(video['id']) - entries.append(self.url_result( - 'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo', video_id)) - - return self.playlist_result(entries, season_id, title) diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/eagleplatform.py youtube-dl-2016.08.24.1/youtube_dl/extractor/eagleplatform.py --- youtube-dl-2016.08.22/youtube_dl/extractor/eagleplatform.py 2016-08-21 21:17:03.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/eagleplatform.py 2016-08-24 03:05:40.000000000 +0000 @@ -52,11 +52,24 @@ @staticmethod def _extract_url(webpage): + # Regular iframe embedding mobj = re.search( r']+src=(["\'])(?P(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1', webpage) if mobj is not None: return mobj.group('url') + # Basic usage embedding (see http://dultonmedia.github.io/eplayer/) + mobj = re.search( + r'''(?xs) + ]+ + src=(?P["\'])(?:https?:)?//(?P.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1) + .+? + ]+ + class=(?P["\'])eagleplayer(?P=q2)[^>]+ + data-id=["\'](?P\d+) + ''', webpage) + if mobj is not None: + return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() @staticmethod def _handle_error(response): diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/extractors.py youtube-dl-2016.08.24.1/youtube_dl/extractor/extractors.py --- youtube-dl-2016.08.22/youtube_dl/extractor/extractors.py 2016-08-21 21:17:21.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/extractors.py 2016-08-24 03:05:40.000000000 +0000 @@ -1,7 +1,10 @@ # flake8: noqa from __future__ import unicode_literals -from .abc import ABCIE +from .abc import ( + ABCIE, + ABCIViewIE, +) from .abc7news import Abc7NewsIE from .abcnews import ( AbcNewsIE, @@ -68,6 +71,12 @@ from .audimedia import AudiMediaIE from .audioboom import AudioBoomIE from .audiomack import AudiomackIE, AudiomackAlbumIE +from .awaan import ( + AWAANIE, + AWAANVideoIE, + AWAANLiveIE, + AWAANSeasonIE, +) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE @@ -197,12 +206,6 @@ DaumUserIE, ) from .dbtv import DBTVIE -from .dcn import ( - DCNIE, - DCNVideoIE, - DCNLiveIE, - DCNSeasonIE, -) from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .democracynow import DemocracynowIE diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/mtv.py youtube-dl-2016.08.24.1/youtube_dl/extractor/mtv.py --- youtube-dl-2016.08.22/youtube_dl/extractor/mtv.py 2016-08-21 21:17:03.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/mtv.py 2016-08-24 03:05:41.000000000 +0000 @@ -257,8 +257,8 @@ def _get_feed_url(self, uri): video_id = self._id_from_uri(uri) site_id = uri.replace(video_id, '') - config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/' - 'context4/context5/config.xml'.format(site_id)) + config_url = ('http://media.mtvnservices.com/pmt-arc/e1/players/{0}/' + 'context52/config.xml'.format(site_id)) config_doc = self._download_xml(config_url, video_id) feed_node = config_doc.find('.//feed') feed_url = feed_node.text.strip().split('?')[0] diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/nrk.py youtube-dl-2016.08.24.1/youtube_dl/extractor/nrk.py --- youtube-dl-2016.08.22/youtube_dl/extractor/nrk.py 2016-08-21 21:17:03.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/nrk.py 2016-08-24 03:05:41.000000000 +0000 @@ -14,16 +14,6 @@ class NRKBaseIE(InfoExtractor): - def _extract_formats(self, manifest_url, video_id, fatal=True): - formats = [] - formats.extend(self._extract_f4m_formats( - manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', - video_id, f4m_id='hds', fatal=fatal)) - formats.extend(self._extract_m3u8_formats(manifest_url.replace( - 'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'), - video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal)) - return formats - def _real_extract(self, url): video_id = self._match_id(url) @@ -45,7 +35,7 @@ asset_url = asset.get('url') if not asset_url: continue - formats = self._extract_formats(asset_url, video_id, fatal=False) + formats = self._extract_akamai_formats(asset_url, video_id) if not formats: continue self._sort_formats(formats) @@ -69,7 +59,7 @@ if not entries: media_url = data.get('mediaUrl') if media_url: - formats = self._extract_formats(media_url, video_id) + formats = self._extract_akamai_formats(media_url, video_id) self._sort_formats(formats) duration = parse_duration(data.get('duration')) entries = [{ diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/openload.py youtube-dl-2016.08.24.1/youtube_dl/extractor/openload.py --- youtube-dl-2016.08.22/youtube_dl/extractor/openload.py 2016-08-21 21:17:03.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/openload.py 2016-08-24 03:05:41.000000000 +0000 @@ -1,12 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals, division -import math - from .common import InfoExtractor -from ..compat import compat_chr +from ..compat import ( + compat_chr, + compat_ord, +) from ..utils import ( - decode_png, determine_ext, ExtractorError, ) @@ -42,71 +42,26 @@ def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) - if 'File not found' in webpage: + if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True) - # The following extraction logic is proposed by @Belderak and @gdkchan - # and declared to be used freely in youtube-dl - # See https://github.com/rg3/youtube-dl/issues/9706 - - numbers_js = self._download_webpage( - 'https://openload.co/assets/js/obfuscator/n.js', video_id, - note='Downloading signature numbers') - signums = self._search_regex( - r'window\.signatureNumbers\s*=\s*[\'"](?P[a-z]+)[\'"]', - numbers_js, 'signature numbers', group='data') - - linkimg_uri = self._search_regex( - r']+id="linkimg"[^>]+src="([^"]+)"', webpage, 'link image') - linkimg = self._request_webpage( - linkimg_uri, video_id, note=False).read() - - width, height, pixels = decode_png(linkimg) - - output = '' - for y in range(height): - for x in range(width): - r, g, b = pixels[y][3 * x:3 * x + 3] - if r == 0 and g == 0 and b == 0: - break - else: - output += compat_chr(r) - output += compat_chr(g) - output += compat_chr(b) - - img_str_length = len(output) // 200 - img_str = [[0 for x in range(img_str_length)] for y in range(10)] - - sig_str_length = len(signums) // 260 - sig_str = [[0 for x in range(sig_str_length)] for y in range(10)] - - for i in range(10): - for j in range(img_str_length): - begin = i * img_str_length * 20 + j * 20 - img_str[i][j] = output[begin:begin + 20] - for j in range(sig_str_length): - begin = i * sig_str_length * 26 + j * 26 - sig_str[i][j] = signums[begin:begin + 26] - - parts = [] - # TODO: find better names for str_, chr_ and sum_ - str_ = '' - for i in [2, 3, 5, 7]: - str_ = '' - sum_ = float(99) - for j in range(len(sig_str[i])): - for chr_idx in range(len(img_str[i][j])): - if sum_ > float(122): - sum_ = float(98) - chr_ = compat_chr(int(math.floor(sum_))) - if sig_str[i][j][chr_idx] == chr_ and j >= len(str_): - sum_ += float(2.5) - str_ += img_str[i][j][chr_idx] - parts.append(str_.replace(',', '')) + # The following decryption algorithm is written by @yokrysty and + # declared to be freely used in youtube-dl + # See https://github.com/rg3/youtube-dl/issues/10408 + enc_data = self._html_search_regex( + r']+id="hiddenurl"[^>]*>([^<]+)', webpage, 'encrypted data') + + video_url_chars = [] + + for c in enc_data: + j = compat_ord(c) + if j >= 33 and j <= 126: + j = ((j + 14) % 94) + 33 + video_url_chars += compat_chr(j) - video_url = 'https://openload.co/stream/%s~%s~%s~%s' % (parts[3], parts[1], parts[2], parts[0]) + video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/pluralsight.py youtube-dl-2016.08.24.1/youtube_dl/extractor/pluralsight.py --- youtube-dl-2016.08.22/youtube_dl/extractor/pluralsight.py 2016-08-21 21:17:03.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/pluralsight.py 2016-08-24 03:10:35.000000000 +0000 @@ -1,9 +1,10 @@ from __future__ import unicode_literals -import re +import collections import json +import os import random -import collections +import re from .common import InfoExtractor from ..compat import ( @@ -12,10 +13,11 @@ ) from ..utils import ( ExtractorError, + float_or_none, int_or_none, parse_duration, qualities, - sanitized_Request, + srt_subtitles_timecode, urlencode_postdata, ) @@ -75,12 +77,10 @@ if not post_url.startswith('http'): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) - request = sanitized_Request( - post_url, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - response = self._download_webpage( - request, None, 'Logging in as %s' % username) + post_url, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) error = self._search_regex( r']+class="field-validation-error"[^>]*>([^<]+)', @@ -91,6 +91,53 @@ if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): raise ExtractorError('Unable to log in') + def _get_subtitles(self, author, clip_id, lang, name, duration, video_id): + captions_post = { + 'a': author, + 'cn': clip_id, + 'lc': lang, + 'm': name, + } + captions = self._download_json( + '%s/training/Player/Captions' % self._API_BASE, video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False, data=json.dumps(captions_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) + if captions: + return { + lang: [{ + 'ext': 'json', + 'data': json.dumps(captions), + }, { + 'ext': 'srt', + 'data': self._convert_subtitles(duration, captions), + }] + } + + @staticmethod + def _convert_subtitles(duration, subs): + srt = '' + for num, current in enumerate(subs): + current = subs[num] + start, text = float_or_none( + current.get('DisplayTimeOffset')), current.get('Text') + if start is None or text is None: + continue + end = duration if num == len(subs) - 1 else float_or_none( + subs[num + 1].get('DisplayTimeOffset')) + if end is None: + continue + srt += os.linesep.join( + ( + '%d' % num, + '%s --> %s' % ( + srt_subtitles_timecode(start), + srt_subtitles_timecode(end)), + text, + os.linesep, + )) + return srt + def _real_extract(self, url): qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) @@ -138,6 +185,8 @@ if not clip: raise ExtractorError('Unable to resolve clip') + title = '%s - %s' % (module['title'], clip['title']) + QUALITIES = { 'low': {'width': 640, 'height': 480}, 'medium': {'width': 848, 'height': 640}, @@ -196,13 +245,12 @@ 'mt': ext, 'q': '%dx%d' % (f['width'], f['height']), } - request = sanitized_Request( - '%s/training/Player/ViewClip' % self._API_BASE, - json.dumps(clip_post).encode('utf-8')) - request.add_header('Content-Type', 'application/json;charset=utf-8') format_id = '%s-%s' % (ext, quality) clip_url = self._download_webpage( - request, display_id, 'Downloading %s URL' % format_id, fatal=False) + '%s/training/Player/ViewClip' % self._API_BASE, display_id, + 'Downloading %s URL' % format_id, fatal=False, + data=json.dumps(clip_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) # Pluralsight tracks multiple sequential calls to ViewClip API and start # to return 429 HTTP errors after some time (see @@ -225,18 +273,20 @@ formats.append(f) self._sort_formats(formats) - # TODO: captions - # http://www.pluralsight.com/training/Player/ViewClip + cap = true - # or - # http://www.pluralsight.com/training/Player/Captions - # { a = author, cn = clip_id, lc = end, m = name } + duration = int_or_none( + clip.get('duration')) or parse_duration(clip.get('formattedDuration')) + + # TODO: other languages? + subtitles = self.extract_subtitles( + author, clip_id, 'en', name, duration, display_id) return { 'id': clip.get('clipName') or clip['name'], - 'title': '%s - %s' % (module['title'], clip['title']), - 'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')), + 'title': title, + 'duration': duration, 'creator': author, - 'formats': formats + 'formats': formats, + 'subtitles': subtitles, } diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/syfy.py youtube-dl-2016.08.24.1/youtube_dl/extractor/syfy.py --- youtube-dl-2016.08.22/youtube_dl/extractor/syfy.py 2016-08-21 21:17:04.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/syfy.py 2016-08-24 03:05:41.000000000 +0000 @@ -31,7 +31,7 @@ display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) syfy_mpx = list(self._parse_json(self._search_regex( - r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'), + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), display_id)['syfy']['syfy_mpx'].values())[0] video_id = syfy_mpx['mpxGUID'] title = syfy_mpx['episodeTitle'] diff -Nru youtube-dl-2016.08.22/youtube_dl/extractor/youtube.py youtube-dl-2016.08.24.1/youtube_dl/extractor/youtube.py --- youtube-dl-2016.08.22/youtube_dl/extractor/youtube.py 2016-08-21 21:17:04.000000000 +0000 +++ youtube-dl-2016.08.24.1/youtube_dl/extractor/youtube.py 2016-08-24 03:05:42.000000000 +0000 @@ -91,36 +91,18 @@ if login_page is False: return - galx = self._search_regex(r'(?s)