#!/usr/bin/env python3 # coding=utf-8 # Search Module for Drastikbot # # Provides the results of various search engines like: # Google, Bing, Duckduckgo, Searx, Startpage # # Depends: # - requests :: $ pip3 install requests # - beautifulsoup :: $ pip3 install beautifulsoup4 # - url :: included with drastikbot_modules, should be loaded. ''' Copyright (C) 2018, 2021 drastik.org This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . ''' import urllib.parse import json import requests import bs4 import url # drastikbot_modules: url.py class Module: def __init__(self): self.commands = ['g', 'bing', 'ddg', 'searx', 'sp'] self.manual = { "desc": ("Get search results from Duckduckgo, Google, Bing" ", Searx and Startpage."), "bot_commands": { "g": {"usage": lambda x: f"{x}g "}, "bing": {"usage": lambda x: f"{x}bing "}, "ddg": {"usage": lambda x: f"{x}ddg "}, "searx": {"usage": lambda x: f"{x}searx "}, "sp": {"usage": lambda x: f"{x}sp "} } } # ----- Constants ----- # opt_title_tag = True parser = 'html.parser' lang = "en-US" ua_chrome_90 = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" " (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36") # --------------------- # # --- Helper Functions --- # def url2str(query): return urllib.parse.unquote_plus(query) def url_extract(url): u = urllib.parse.urlparse(url) u = urllib.parse.parse_qs(u.query) try: u = u['v'][0] except Exception: u = u['video_id'][0] return ''.join(u.split()) def urlfix(url): url = url.replace(' ', '') if not (url.startswith('http://') or url.startswith('https://')): url = 'http://' + url return url # ==================================================================== # Google # ==================================================================== def google(query): search = f'https://www.google.com/search?q={query}' return "google", "This search engine is not supported yet." # ==================================================================== # Bing # ==================================================================== def bing(args): query = urllib.parse.quote(args, safe="") u = f"https://bing.com/search?q={query}" h = headers={ "Accept-Language": lang, "user-agent": ua_chrome_90, } r = requests.get(u, headers=h, timeout=10) soup = bs4.BeautifulSoup(r.text, parser) results_l = soup.find_all("li", {"class": "b_algo"}) result = results_l[0].find("a").get("href") return "bing", result # ==================================================================== # Duckduckgo # ==================================================================== def duckduckgo(args): query = urllib.parse.quote(args, safe="") if args[0] == '!': return "duckduckgo", duckduckgo_bang(query) return "duckduckgo", duckduckgo_search(query) def duckduckgo_bang(query): u = f"https://api.duckduckgo.com/?q={query}&format=json&no_redirect=1" h = { "Accept-Language": lang } r = requests.get(u, headers=h, timeout=10) return r.json()["Redirect"] def duckduckgo_search(query): u = ("https://html.duckduckgo.com/html/" f"?q={query}&kl=wt-wt&kp=-2&kaf=1&kh=1&k1=-1&kd=-1") h = { "user-agent": ua_chrome_90, "Accept-Language": lang } r = requests.get(u, headers=h, timeout=10) soup = bs4.BeautifulSoup(r.text, parser) result = soup.find("a", {"class": ["result__url"]}) return result.get("href") # ==================================================================== # Searx # ==================================================================== def searx(query): search = f'https://searx.me/?q={query}' return "searx", "This search engine is not supported yet." # ==================================================================== # Startpage # ==================================================================== def startpage(query): search = f'https://www.startpage.com/do/asearch?q={query}' return "startpage", "This search engine is not supported yet." # ==================================================================== # Main # ==================================================================== dispatch = { "g": google, "bing": bing, "ddg": duckduckgo, "searx": searx, "sp": startpage } logo_d = { "google": "\x0302G\x0304o\x0308o\x0302g\x0309l\x0304e\x0F", "bing": "\x0315Bing\x0F", "duckduckgo": "\x0315DuckDuckGo\x0F", "searx": "\x0315sear\x0311X\x0F", "startpage": "\x0304start\x0302page\x0F" } # err = f'{logo}: \x0308Sorry, i could not find any results for:\x0F {query}' def main(i, irc): args = i.msg_nocmd botcmd = i.cmd receiver = i.channel engine, result = dispatch[botcmd](args) title = None if opt_title_tag: title = url.get_title(result) logo = logo_d[engine] if title: m = f"{logo}: {result} | < title: {title}" else: m = f"{logo}: {result}" # Truncate the output just in case. We can't send 512 bytes anyway. m = m[:512] irc.privmsg(receiver, m)