forked from zuckerberg/dailybot
216 lines
5.8 KiB
Python
216 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
# coding=utf-8
|
|
|
|
# Search Module for Drastikbot
|
|
#
|
|
# Provides the results of various search engines like:
|
|
# Google, Bing, Duckduckgo, Searx, Startpage
|
|
#
|
|
# Depends:
|
|
# - requests :: $ pip3 install requests
|
|
# - beautifulsoup :: $ pip3 install beautifulsoup4
|
|
# - url :: included with drastikbot_modules, should be loaded.
|
|
|
|
'''
|
|
Copyright (C) 2018, 2021 drastik.org
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
'''
|
|
|
|
import urllib.parse
|
|
import json
|
|
import requests
|
|
import bs4
|
|
import url # drastikbot_modules: url.py
|
|
|
|
|
|
class Module:
|
|
def __init__(self):
|
|
self.commands = ['g', 'bing', 'ddg', 'searx', 'sp']
|
|
self.manual = {
|
|
"desc": ("Get search results from Duckduckgo, Google, Bing"
|
|
", Searx and Startpage."),
|
|
"bot_commands": {
|
|
"g": {"usage": lambda x: f"{x}g <query>"},
|
|
"bing": {"usage": lambda x: f"{x}bing <query>"},
|
|
"ddg": {"usage": lambda x: f"{x}ddg <query>"},
|
|
"searx": {"usage": lambda x: f"{x}searx <query>"},
|
|
"sp": {"usage": lambda x: f"{x}sp <query>"}
|
|
}
|
|
}
|
|
|
|
|
|
# ----- Constants ----- #
|
|
opt_title_tag = True
|
|
parser = 'html.parser'
|
|
lang = "en-US"
|
|
|
|
ua_chrome_90 = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
|
" (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36")
|
|
|
|
|
|
# --------------------- #
|
|
|
|
|
|
# --- Helper Functions --- #
|
|
def url2str(query):
|
|
return urllib.parse.unquote_plus(query)
|
|
|
|
|
|
def url_extract(url):
|
|
u = urllib.parse.urlparse(url)
|
|
u = urllib.parse.parse_qs(u.query)
|
|
try:
|
|
u = u['v'][0]
|
|
except Exception:
|
|
u = u['video_id'][0]
|
|
return ''.join(u.split())
|
|
|
|
|
|
def urlfix(url):
|
|
url = url.replace(' ', '')
|
|
if not (url.startswith('http://') or url.startswith('https://')):
|
|
url = 'http://' + url
|
|
return url
|
|
|
|
|
|
# ====================================================================
|
|
# Google
|
|
# ====================================================================
|
|
|
|
def google(query):
|
|
search = f'https://www.google.com/search?q={query}'
|
|
return "google", "This search engine is not supported yet."
|
|
|
|
|
|
# ====================================================================
|
|
# Bing
|
|
# ====================================================================
|
|
|
|
def bing(args):
|
|
query = urllib.parse.quote(args, safe="")
|
|
u = f"https://bing.com/search?q={query}"
|
|
h = headers={
|
|
"Accept-Language": lang,
|
|
"user-agent": ua_chrome_90,
|
|
}
|
|
r = requests.get(u, headers=h, timeout=10)
|
|
soup = bs4.BeautifulSoup(r.text, parser)
|
|
|
|
results_l = soup.find_all("li", {"class": "b_algo"})
|
|
result = results_l[0].find("a").get("href")
|
|
|
|
return "bing", result
|
|
|
|
|
|
# ====================================================================
|
|
# Duckduckgo
|
|
# ====================================================================
|
|
|
|
def duckduckgo(args):
|
|
query = urllib.parse.quote(args, safe="")
|
|
|
|
if args[0] == '!':
|
|
return "duckduckgo", duckduckgo_bang(query)
|
|
|
|
return "duckduckgo", duckduckgo_search(query)
|
|
|
|
|
|
def duckduckgo_bang(query):
|
|
u = f"https://api.duckduckgo.com/?q={query}&format=json&no_redirect=1"
|
|
h = {
|
|
"Accept-Language": lang
|
|
}
|
|
r = requests.get(u, headers=h, timeout=10)
|
|
return r.json()["Redirect"]
|
|
|
|
|
|
def duckduckgo_search(query):
|
|
u = ("https://html.duckduckgo.com/html/"
|
|
f"?q={query}&kl=wt-wt&kp=-2&kaf=1&kh=1&k1=-1&kd=-1")
|
|
h = {
|
|
"user-agent": ua_chrome_90,
|
|
"Accept-Language": lang
|
|
}
|
|
r = requests.get(u, headers=h, timeout=10)
|
|
soup = bs4.BeautifulSoup(r.text, parser)
|
|
|
|
result = soup.find("a", {"class": ["result__url"]})
|
|
return result.get("href")
|
|
|
|
|
|
# ====================================================================
|
|
# Searx
|
|
# ====================================================================
|
|
|
|
def searx(query):
|
|
search = f'https://searx.me/?q={query}'
|
|
return "searx", "This search engine is not supported yet."
|
|
|
|
|
|
# ====================================================================
|
|
# Startpage
|
|
# ====================================================================
|
|
|
|
def startpage(query):
|
|
search = f'https://www.startpage.com/do/asearch?q={query}'
|
|
return "startpage", "This search engine is not supported yet."
|
|
|
|
|
|
# ====================================================================
|
|
# Main
|
|
# ====================================================================
|
|
|
|
dispatch = {
|
|
"g": google,
|
|
"bing": bing,
|
|
"ddg": duckduckgo,
|
|
"searx": searx,
|
|
"sp": startpage
|
|
}
|
|
|
|
logo_d = {
|
|
"google": "\x0302G\x0304o\x0308o\x0302g\x0309l\x0304e\x0F",
|
|
"bing": "\x0315Bing\x0F",
|
|
"duckduckgo": "\x0315DuckDuckGo\x0F",
|
|
"searx": "\x0315sear\x0311X\x0F",
|
|
"startpage": "\x0304start\x0302page\x0F"
|
|
}
|
|
|
|
# err = f'{logo}: \x0308Sorry, i could not find any results for:\x0F {query}'
|
|
|
|
|
|
def main(i, irc):
|
|
args = i.msg_nocmd
|
|
botcmd = i.cmd
|
|
receiver = i.channel
|
|
|
|
engine, result = dispatch[botcmd](args)
|
|
|
|
title = None
|
|
if opt_title_tag:
|
|
title = url.get_title(result)
|
|
|
|
logo = logo_d[engine]
|
|
|
|
if title:
|
|
m = f"{logo}: {result} | < title: {title}"
|
|
else:
|
|
m = f"{logo}: {result}"
|
|
|
|
# Truncate the output just in case. We can't send 512 bytes anyway.
|
|
m = m[:512]
|
|
|
|
irc.privmsg(receiver, m)
|