| 123456789101112131415161718192021222324252627282930313233343536373839404142 |
- import requests
- import re
- from plugins.common import settings
- session = requests.Session()
- # 正则提取摘要和链接
- title_pattern = re.compile('<a.target=..blank..target..(.*?)</a>')
- brief_pattern = re.compile('K=.SERP(.*?)</p>')
- link_pattern = re.compile(
- '(?<=(a.target=._blank..target=._blank..href=.))(.*?)(?=(..h=))')
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36 Edg/94.0.992.31'}
- proxies = {"http": None,"https": None,}
- def find(search_query,step = 0):
- try:
- url = 'https://cn.bing.com/search?q={}'.format(search_query)
- res = session.get(url, headers=headers, proxies=proxies)
- r = res.text
- title = title_pattern.findall(r)
- brief = brief_pattern.findall(r)
- link = link_pattern.findall(r)
- # 数据清洗
- clear_brief = []
- for i in brief:
- tmp = re.sub('<[^<]+?>', '', i).replace('\n', '').strip()
- tmp1 = re.sub('^.* ', '', tmp).replace('\n', '').strip()
- tmp2 = re.sub('^.*>', '', tmp1).replace('\n', '').strip()
- clear_brief.append(tmp2)
- clear_title = []
- for i in title:
- tmp = re.sub('^.*?>', '', i).replace('\n', '').strip()
- tmp2 = re.sub('<[^<]+?>', '', tmp).replace('\n', '').strip()
- clear_title.append(tmp2)
- return [{'title': "["+clear_title[i]+"]("+link[i][1]+")", 'content':clear_brief[i]}
- for i in range(min(int(settings.librarys.bing.count), len(brief)))]
- except:
- return []
|