zhishiku_fess.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. import requests
  2. import re, json
  3. from plugins.common import settings
  4. # encoding=utf-8
  5. import jieba
  6. with open("plugins/stopwords_txt", encoding="utf-8") as f:
  7. stopwords = f.read().split('\n')
  8. session = requests.Session()
  9. headers = {
  10. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36 Edg/94.0.992.31',
  11. 'Content-Type': 'application/json;charset=utf-8'}
  12. proxies = {"http": None, "https": None, }
  13. def replace_strong(s):
  14. s = re.sub(r'<strong>', "", s)
  15. s = re.sub(r'</strong>', "", s)
  16. return s
  17. def remove_stopwords(search_query):
  18. search_query_without_stopwords = []
  19. for i in search_query:
  20. try:
  21. stopwords.index(i)
  22. search_query_without_stopwords.append("########")
  23. except:
  24. search_query_without_stopwords.append(i)
  25. return search_query_without_stopwords
  26. def removeduplicate(list1):
  27. """
  28. 列表套字典去重复
  29. :param list1: 输入一个有重复值的列表
  30. :return: 返回一个去掉重复的列表
  31. """
  32. newlist = []
  33. for i in list1: # 先遍历原始字典
  34. flag = True
  35. if newlist == []: # 如果是空的列表就不会有重复,直接往里添加
  36. pass
  37. else:
  38. for j in newlist:
  39. for key in i.keys():
  40. if i['_id'] == j['_id']:
  41. flag = False
  42. if flag:
  43. newlist.append(i)
  44. return newlist
  45. def find(search_query, step=0):
  46. try:
  47. search_query = jieba.cut(search_query)
  48. search_query = remove_stopwords(search_query)
  49. search_query = " ".join(search_query)
  50. print("关键词:", search_query)
  51. rest = []
  52. for i in search_query.split("########"):
  53. if len(i.strip()) > 0:
  54. if settings.librarys.fess.version is not None and settings.librarys.fess.version < 14.8:
  55. url = 'http://' + settings.librarys.fess.fess_host + '/json/?q={}&num=10&sort=score.desc&lang=zh_CN'.format(i)
  56. else:
  57. # adapt for >fess14.8
  58. url = 'http://' + settings.librarys.fess.fess_host + '/api/v1/documents/?q={}&num=10&sort=score.desc&lang=zh_CN'.format(
  59. i)
  60. res = session.get(url, headers=headers, proxies=proxies)
  61. r = res.json()
  62. if settings.librarys.fess.version is not None and settings.librarys.fess.version < 14.8:
  63. r = r["response"]['result']
  64. else:
  65. r = r["data"]
  66. # print('rrrrrrrrrrrrrrr',r)
  67. rest.extend(r)
  68. else:
  69. continue
  70. r = removeduplicate(rest)
  71. # print('restrestrestrestrestrest', r)
  72. # "<strong>""</strong>"
  73. return [{'title': r[i]['title'], 'content': replace_strong(r[i]['content_description'])}
  74. for i in range(min(int(settings.librarys.fess.count), len(r)))]
  75. except Exception as e:
  76. print("fess读取失败", e)
  77. return []
  78. from bottle import route, response, request, static_file, hook
  79. import bottle
  80. @route('/find_fess_zhishiku', method=("POST", "OPTIONS"))
  81. def upload_zhishiku():
  82. data = request.json
  83. prompt = data.get('prompt')
  84. try:
  85. if settings.librarys.fess.version is not None and settings.librarys.fess.version < 14.8:
  86. url = 'http://' + settings.librarys.fess.fess_host + '/json/?q={}&num=10&sort=score.desc&lang=zh_CN'.format(
  87. i)
  88. else:
  89. # adapt for >fess14.8
  90. url = 'http://' + settings.librarys.fess.fess_host + '/api/v1/documents/?q={}&num=10&sort=score.desc&lang=zh_CN'.format(
  91. i)
  92. res = session.get(url, headers=headers, proxies=proxies)
  93. r = res.json()
  94. if settings.librarys.fess.version is not None and settings.librarys.fess.version < 14.8:
  95. r = r["response"]['result']
  96. else:
  97. r = r["data"]
  98. # "<strong>""</strong>"
  99. return json.dumps([{'title': r[i]['title'], 'content': replace_strong(r[i]['content_description'])}
  100. for i in range(min(int(settings.librarys.fess.count), len(r)))])
  101. except Exception as e:
  102. print("fess读取失败", e)
  103. return json.dumps([])
粤ICP备19079148号