zhishiku_univeralsearch.py 825 B

123456789101112131415161718192021
  1. import requests
  2. from gerapy_auto_extractor import extract_list,extract_detail
  3. #from selenium import webdriver # 加载浏览器的库
  4. session = requests.Session()
  5. headers = {
  6. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36 Edg/94.0.992.31'
  7. }
  8. def find(urlpath,search_query):
  9. url = urlpath+'{}'.format(search_query)
  10. #url = 'https://cn.bing.com/search?q={}'.format(search_query)
  11. #url = 'https://www.baidu.com/s?wd={}'.format(search_query)
  12. #url = 'https://cn.bing.com/academic?mkt=zh-CN'
  13. #url = 'https://www.google.com/search?q={}'.format(search_query)
  14. res=requests.get(url, headers=headers).text
  15. # 使用gerapy_auto_extractor提取列表数据
  16. extracted_data = extract_list(res)
  17. return extracted_data
粤ICP备19079148号