From 81b4008b87d8d5fa2ad188ad469990b8b913037d Mon Sep 17 00:00:00 2001 From: blindlight86 Date: Wed, 25 Oct 2023 15:12:19 +0800 Subject: [PATCH 1/4] Add kankannews epgs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 看看新闻网,上海6个频道 --- crawl/spiders/__init__.py | 5 +++ crawl/spiders/kankan.py | 86 +++++++++++++++++++++++++++++++++++++++ web/models.py | 1 + 3 files changed, 92 insertions(+) create mode 100644 crawl/spiders/kankan.py diff --git a/crawl/spiders/__init__.py b/crawl/spiders/__init__.py index 4a4c007..8f86471 100644 --- a/crawl/spiders/__init__.py +++ b/crawl/spiders/__init__.py @@ -21,6 +21,7 @@ from crawl.spiders.gxntv import get_epgs_gxntv,get_channels_gxntv from utils.general import chuanliu_Authorization from crawl.spiders.sdtv import get_epgs_sdtv,get_channels_sdtv +from crawl.spiders.kankan import get_epgs_kankan,get_channels_kankan epg_funcs = { 'tvmao':get_epgs_tvmao2, 'tbc':get_epgs_tbc, @@ -41,6 +42,7 @@ 'mytvsuper':get_epgs_mytvsuper, 'gxntv':get_epgs_gxntv, 'sdtv':get_epgs_sdtv, + 'kankn':get_epgs_kankan, } #所有EPG的接口 epg_source = { 'tvmao':get_channels_tvmao, @@ -62,6 +64,7 @@ 'mytvsuper':get_channels_mytvsuper, 'gxntv':get_channels_gxntv, 'sdtv':get_channels_sdtv, + 'kankn':get_channels_kankan, } func_args = { 'tvmao':0, @@ -83,6 +86,7 @@ 'mytvsuper':0, 'gxntv':0, 'sdtv':0, + 'kankn':0, } def epg_func(channel,id,dt,func_arg=0,source = 0): if source: @@ -111,6 +115,7 @@ def epg_func(channel,id,dt,func_arg=0,source = 0): 'get_epgs_mytvsuper', 'get_epgs_gxntv', 'get_epgs_sdtv', + 'get_epgs_kankan', 'epg_funcs', 'func_args', 'epg_func', diff --git a/crawl/spiders/kankan.py b/crawl/spiders/kankan.py new file mode 100644 index 0000000..035db0b --- /dev/null +++ b/crawl/spiders/kankan.py @@ -0,0 +1,86 @@ +# -*- coding:utf-8 -*- +# 看看新闻网-官网来源,上海6个频道 +import requests, datetime, os, re, time, json, hashlib +from utils.general import headers +from bs4 import BeautifulSoup as bs + +def get_epgs_kankan(channel, channel_id, dt, func_arg): + epgs = [] + msg = '' + success = 1 + timestamp = int(time.time()) + date = dt.strftime("%Y-%m-%d") + payload = {} + params = { + 'Api-Version': 'v1', + 'channel_id': channel_id, + 'date': date, + 'nonce': '1', + 'platform': 'pc', + 'timestamp': timestamp, + 'version': 'v2.0.0', + } + s = '&'.join([f'{key}={params[key]}' for key in sorted(params.keys())]) + s = f'{s}&28c8edde3d61a0411511d3b1866f0636' + hashed_s = hashlib.md5(s.encode()).hexdigest() + sign = hashlib.md5(hashed_s.encode()).hexdigest() + headers = { + 'Api-version': 'v1', + 'nonce': '1', + 'platform': 'pc', + 'version': 'v2.0.0', + 'sign': sign, + 'timestamp': str(timestamp), + } + try: + url = f"https://kapi.kankanews.com/content/pc/tv/programs?channel_id={channel_id}&date={date}" + res = requests.request("GET", url, headers=headers, data=payload, timeout=8) + res.encoding = 'utf-8' + re_json = json.loads(res) + contents = re_json['result']['programs'] + for content in contents: + starttime = content['start_time_string'] + endtime = content['end_time_string'] + title = content['name'] + epg = {'channel_id': channel_id, + 'starttime': starttime, + 'endtime': endtime, + 'title': title, + 'desc': '', + 'program_date': dt, + } + epgs.append(epg) + except Exception as e: + success = 0 + spidername = os.path.basename(__file__).split('.')[0] + msg = f'spider-{spidername}- {e}' + return { + 'success': success, + 'epgs': epgs, + 'msg': msg, + 'last_program_date': dt, + 'ban': 0, + } + +def get_channels_kankan(): # sourcery skip: avoid-builtin-shadow + channels = [] + url = 'https://live.kankanews.com/huikan/' + res = requests.get(url) + res.encoding = 'utf-8' + soup = bs(res.text,'html.parser') + div_channels = soup.select('div.channel.item.cur > li') + for i, div_channel in enumerate(div_channels): + name = div_channel.p.text.strip() + id = str(i+1) + channel = { + 'name': name, + 'id': [id], + 'url': url, + 'source': 'kankan', + 'logo': '', + 'desc': '', + 'sort': '上海', + } + channels.append(channel) + print(f'共有:{len(channels)}个频道') + return channels \ No newline at end of file diff --git a/web/models.py b/web/models.py index 0a829b5..0ab87aa 100644 --- a/web/models.py +++ b/web/models.py @@ -28,6 +28,7 @@ class Channel(models.Model): ('mytvsuper','myTVSUPER'), ('gxntv','广西网络广播电视'), ('sdtv','山东齐鲁'), + ('kankan','看看新闻网'), ] need_get = [ (1,'是'), From 5f10350acfad76c0722755b38d24208c31f5be54 Mon Sep 17 00:00:00 2001 From: blindlight86 Date: Wed, 25 Oct 2023 15:51:13 +0800 Subject: [PATCH 2/4] Update __init__.py --- crawl/spiders/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crawl/spiders/__init__.py b/crawl/spiders/__init__.py index 8f86471..a3fd9b5 100644 --- a/crawl/spiders/__init__.py +++ b/crawl/spiders/__init__.py @@ -42,7 +42,7 @@ 'mytvsuper':get_epgs_mytvsuper, 'gxntv':get_epgs_gxntv, 'sdtv':get_epgs_sdtv, - 'kankn':get_epgs_kankan, + 'kankan':get_epgs_kankan, } #所有EPG的接口 epg_source = { 'tvmao':get_channels_tvmao, @@ -64,7 +64,7 @@ 'mytvsuper':get_channels_mytvsuper, 'gxntv':get_channels_gxntv, 'sdtv':get_channels_sdtv, - 'kankn':get_channels_kankan, + 'kankan':get_channels_kankan, } func_args = { 'tvmao':0, @@ -86,7 +86,7 @@ 'mytvsuper':0, 'gxntv':0, 'sdtv':0, - 'kankn':0, + 'kankan':0, } def epg_func(channel,id,dt,func_arg=0,source = 0): if source: From bc78aedee44f19b8581f937aa45f7d3afe54d4c3 Mon Sep 17 00:00:00 2001 From: blindlight86 Date: Wed, 25 Oct 2023 16:22:02 +0800 Subject: [PATCH 3/4] Update kankan.py --- crawl/spiders/kankan.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/crawl/spiders/kankan.py b/crawl/spiders/kankan.py index 035db0b..e205c89 100644 --- a/crawl/spiders/kankan.py +++ b/crawl/spiders/kankan.py @@ -36,11 +36,11 @@ def get_epgs_kankan(channel, channel_id, dt, func_arg): url = f"https://kapi.kankanews.com/content/pc/tv/programs?channel_id={channel_id}&date={date}" res = requests.request("GET", url, headers=headers, data=payload, timeout=8) res.encoding = 'utf-8' - re_json = json.loads(res) + re_json = json.loads(res.text) contents = re_json['result']['programs'] for content in contents: - starttime = content['start_time_string'] - endtime = content['end_time_string'] + starttime = datetime.datetime.fromtimestamp(content['start_time']) + endtime = datetime.datetime.fromtimestamp(content['end_time']) title = content['name'] epg = {'channel_id': channel_id, 'starttime': starttime, @@ -63,6 +63,12 @@ def get_epgs_kankan(channel, channel_id, dt, func_arg): } def get_channels_kankan(): # sourcery skip: avoid-builtin-shadow + ids = {'东方卫视': '1', + '新闻综合': '2', + '第一财经': '5', + '纪实人文': '6', + '都市频道': '4', + '哈哈炫动': '9'} channels = [] url = 'https://live.kankanews.com/huikan/' res = requests.get(url) @@ -71,7 +77,7 @@ def get_channels_kankan(): # sourcery skip: avoid-builtin-shadow div_channels = soup.select('div.channel.item.cur > li') for i, div_channel in enumerate(div_channels): name = div_channel.p.text.strip() - id = str(i+1) + id = ids[name] channel = { 'name': name, 'id': [id], From a0b7d48231b86fd3bc4e7c16e05bad2be412f9b6 Mon Sep 17 00:00:00 2001 From: blindlight86 Date: Thu, 26 Oct 2023 09:21:16 +0800 Subject: [PATCH 4/4] Update kankan.py --- crawl/spiders/kankan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawl/spiders/kankan.py b/crawl/spiders/kankan.py index e205c89..dc237c0 100644 --- a/crawl/spiders/kankan.py +++ b/crawl/spiders/kankan.py @@ -75,7 +75,7 @@ def get_channels_kankan(): # sourcery skip: avoid-builtin-shadow res.encoding = 'utf-8' soup = bs(res.text,'html.parser') div_channels = soup.select('div.channel.item.cur > li') - for i, div_channel in enumerate(div_channels): + for div_channel in div_channels: name = div_channel.p.text.strip() id = ids[name] channel = {