zhishiku_bingsite.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import requests
  2. import re,os
  3. from plugins.common import settings
  4. cunnrent_setting=settings.librarys.bingsite
  5. session = requests.Session()
  6. # 正则提取摘要和链接
  7. title_pattern = re.compile('<a.target=..blank..target..(.*?)</a>')
  8. brief_pattern = re.compile('K=.SERP(.*?)</p>')
  9. link_pattern = re.compile(
  10. '(?<=(a.target=._blank..target=._blank..href=.))(.*?)(?=(..h=))')
  11. headers = {
  12. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36 Edg/94.0.992.31'}
  13. proxies = {"http": None,"https": None,}
  14. def find(search_query,step):
  15. try:
  16. url = 'https://cn.bing.com/search?q={}'.format(search_query)+" site:"+cunnrent_setting.site
  17. res = session.get(url, headers=headers, proxies=proxies)
  18. r = res.text
  19. title = title_pattern.findall(r)
  20. brief = brief_pattern.findall(r)
  21. link = link_pattern.findall(r)
  22. # 数据清洗
  23. clear_brief = []
  24. for i in brief:
  25. tmp = re.sub('<[^<]+?>', '', i).replace('\n', '').strip()
  26. tmp1 = re.sub('^.*&ensp;', '', tmp).replace('\n', '').strip()
  27. tmp2 = re.sub('^.*>', '', tmp1).replace('\n', '').strip()
  28. clear_brief.append(tmp2)
  29. clear_title = []
  30. for i in title:
  31. tmp = re.sub('^.*?>', '', i).replace('\n', '').strip()
  32. tmp2 = re.sub('<[^<]+?>', '', tmp).replace('\n', '').strip()
  33. clear_title.append(tmp2)
  34. return [{'title': "["+clear_title[i]+"]("+link[i][1]+")", 'content':clear_brief[i]}
  35. for i in range(min(int(cunnrent_setting.count), len(brief)))]
  36. except:
  37. return []
粤ICP备19079148号