174 lines
4.8 KiB
Python
174 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
# coding=utf-8
|
|
|
|
# Wiktionary Module for Drastikbot
|
|
#
|
|
# Depends:
|
|
# - requests :: $ pip3 install requests
|
|
# - beautifulsoup4 :: $ pip3 install beautifulsoup4
|
|
|
|
'''
|
|
Copyright (C) 2018 drastik.org
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
'''
|
|
|
|
import requests
|
|
import bs4
|
|
import re
|
|
from dbot_tools import p_truncate
|
|
|
|
|
|
class Module:
|
|
def __init__(self):
|
|
self.commands = ['wiktionary', 'wt']
|
|
|
|
usage = lambda x, y: f"{x}{y} <word> [-e <num>]"
|
|
info = ("The -e option allows you to choose other defintions."
|
|
" The number of definitions is listed in parenthesis in the"
|
|
" result. In a query, the bot will post the full definitions"
|
|
" without truncating the text.")
|
|
self.manual = {
|
|
"desc": "Search https://en.wiktionary.org/ for word definitions.",
|
|
"bot_commands": {
|
|
"wiktionary": {"usage": lambda x: usage(x, "wiktionary"),
|
|
"info": info,
|
|
"alias": ["wt"]},
|
|
"wt": {"usage": lambda x: usage(x, "wt"),
|
|
"info": info,
|
|
"alias": ["wiktionary"]}
|
|
}
|
|
}
|
|
|
|
|
|
# ----- Global Constants ----- #
|
|
r_timeout = 10
|
|
bs4_parser = 'html.parser'
|
|
# ---------------------------- #
|
|
|
|
|
|
def get_text(html, etymology):
|
|
soup = bs4.BeautifulSoup(html, bs4_parser)
|
|
|
|
result = {}
|
|
result[etymology] = {}
|
|
|
|
s_et = soup.find('span', id=etymology)
|
|
result[etymology]["Etymology"] = s_et.find_next('p').text
|
|
|
|
ids = ("Noun", "Verb", "Adjective", "Adverb",
|
|
"Interjection", "Particle", "Preposition")
|
|
for i in ids:
|
|
s = s_et.find_next('span', string=i)
|
|
try:
|
|
txt = s.find_next('ol').text
|
|
result[etymology][i] = txt
|
|
except Exception as e:
|
|
pass
|
|
|
|
return result
|
|
|
|
|
|
def extract_etymologies(html):
|
|
result = {}
|
|
count = 1
|
|
while(True):
|
|
if 'id="Etymology"' in html:
|
|
result.update(get_text(html, "Etymology"))
|
|
break
|
|
elif f'id="Etymology_{count}"' in html:
|
|
result.update(get_text(html, f"Etymology_{count}"))
|
|
count += 1
|
|
else:
|
|
break
|
|
|
|
return result
|
|
|
|
|
|
def wiktionary(url, res):
|
|
r = requests.get(url, timeout=r_timeout)
|
|
|
|
# Extract the html of a single language section.
|
|
section_end = '<hr'
|
|
html = ""
|
|
for t in r.text.splitlines():
|
|
html += t
|
|
if section_end in t:
|
|
break
|
|
|
|
# Remove quotations so that beautifulsoup doesn't catch them.
|
|
html = re.compile(r'(?ims)<ul>.*?</ul>').sub('', html)
|
|
html = re.compile(r'(?ims)<dl>.*?</dl>').sub('', html)
|
|
html = re.compile(r'(?ims)<span class="defdate">.*?</span>').sub('', html)
|
|
return extract_etymologies(html)
|
|
|
|
|
|
def query(args):
|
|
# Get the args list and the commands
|
|
# Join the list to a string and return
|
|
_args = args[:]
|
|
try:
|
|
idx = _args.index('-e')
|
|
del _args[idx]
|
|
del _args[idx]
|
|
except ValueError:
|
|
pass
|
|
return ' '.join(_args)
|
|
|
|
|
|
def main(i, irc):
|
|
if not i.msg_nocmd:
|
|
msg = (f'Usage: {i.cmd_prefix}{i.cmd} <Word> [-e <NUM>]')
|
|
return irc.privmsg(i.channel, msg)
|
|
|
|
args = i.msg_nocmd.split()
|
|
|
|
if '-e' in args:
|
|
idx = args.index('-e')
|
|
res = int(args[idx + 1])
|
|
else:
|
|
res = 1
|
|
|
|
q = query(args)
|
|
q_web = q.replace(" ", "_")
|
|
url = f"https://en.wiktionary.org/wiki/{q_web}"
|
|
result = wiktionary(url, res)
|
|
result_length = len(result)
|
|
|
|
if res not in range(1, result_length + 1):
|
|
msg = f'Wiktionary: No definition was found for "{q}" | {url}'
|
|
return irc.privmsg(i.channel, msg)
|
|
|
|
if res == 1:
|
|
try:
|
|
result = result["Etymology"]
|
|
except KeyError:
|
|
result = result["Etymology_1"]
|
|
else:
|
|
result = result[f"Etymology_{res}"]
|
|
|
|
msg_len = (irc.var.msg_len - 9 - 8 - len(str(result_length))
|
|
- (len(result) * 5) - len(url))
|
|
p_tr_percent = 100 / len(result)
|
|
|
|
txt = ""
|
|
for part, cont in result.items():
|
|
rslt = f"{part}: {cont}"
|
|
if not i.nickname == i.channel:
|
|
rslt = p_truncate(rslt, msg_len, p_tr_percent, True)
|
|
txt += f"{rslt} | "
|
|
|
|
rpl = f"{q} | {txt}({result_length}) | {url}"
|
|
irc.privmsg(i.channel, rpl)
|