# encoding: utf-8
# api: pagetranslate
# type: classes
# category: language
# title: via_* translation backends
# description: Implements the alternative services (google, deepl, ...)
# version: 1.2
# state: beta
# depends: python:requests (>= 2.5)
# config: -
#
# Different online service backends and http interfaces are now coalesced here.
#
# modules
import re
import urllib
from urllib.parse import urlencode, quote, quote_plus
from httprequests import http
log = None
# translation backend/service
class google:
# regex
rx_gtrans = re.compile('class="t0">(.+?)</div>', re.S)
rx_splitpara = re.compile("(.{1,1895\.}|.{1,1900}\s|.*$)", re.S)
rx_empty = re.compile("^[\s\d,.:;ยง():-]+$")
rx_letters = re.compile("\w\w+", re.UNICODE)
rx_breakln = re.compile("\s?/\s?#\s?ยง\s?/\s?")
def __init__(self, params={}):
self.params = params # config+argparse
# request text translation from google
def askgoogle(self, text, dst_lang="en", src_lang='auto'):
# fetch translation page
url = "http://translate.google.com/m?hl=%s&sl=%s&q=%s" % (
dst_lang, src_lang, quote_plus(text)
)
html = http.get(url).content.decode("utf-8")
# extract content from text <div>
m = self.rx_gtrans.search(html)
if m:
text = m.group(1)
text = text.replace("'", "'").replace("&", "&").replace("<", "<").replace(">", ">").replace(""", '"')
#@todo: https://stackoverflow.com/questions/2087370/decode-html-entities-in-python-string
else:
log.warning("NO TRANSLATION RESULT EXTRACTED: " + html)
log.debug("ORIG TEXT: " + repr(text))
return text
# iterate over text segments (1900 char limit)
def translate(self, text, lang="auto"):
if lang == "auto":
lang = self.params["lang"]
#log.debug("translate %d chars" % len(text))
if len(text) < 2:
log.debug("skipping/len<2")
return text
elif self.rx_empty.match(text):
log.debug("skipping/empty")
return text
elif not self.rx_letters.search(text):
log.debug("skipping/noletters")
return text
elif len(text) >= 1900:
log.debug("spliterate/1900+")
return " ".join(self.askgoogle(segment, lang) for segment in self.rx_splitpara.findall(text))
else:
return self.askgoogle(text, lang)
# translate w/ preserving paragraph breaks (meant for table cell content)
def linebreakwise(self, text, lang="auto"):
if self.params["crlf"] != "quick":
# split on linebreaks and translate each individually
text = "\n\n".join(self.translate(text, lang) for text in text.split("\n\n"))
else:
# use temporary placeholder `/#ยง/`
text = self.translate(text.replace("\n\n", "/#ยง/"), lang)
text = re.sub(self.rx_breakln, "\n\n", text)
return text
class deepl_web(google):
# < https://www2.deepl.com/jsonrpc
# cookies: LMTBID: GUID...
# referer: https://www.deepl.com/translator
# body: {"jsonrpc":"2.0","method": "LMT_handle_jobs","params":{"jobs":[{"kind":"default","raw_en_sentence":"...","raw_en_context_before":[],"raw_en_context_after":[],"preferred_num_beams":4,"quality":"fast"}],"lang":{"user_preferred_langs":["DE","EN"],"source_lang_user_selected":"auto","target_lang":"DE"},"priority":-1,"commonJobParams":{},"timestamp":1590258680854},"id":700000000}
# > result.translations[0].beams[0].postprocessed_sentence
pass
class deepl_api(deepl_web):
pass
# requires `pip install translate`
class translate_python:
def __init__(self, params={}):
self.params = params # config+argparse
self.error = pagetranslate.MessageBox
try:
from translate import Translator
except:
self.error("Use `pip install translate` to use this module.")
self.translate = Translator(provider="microsoft", to_lang=params["lang"], secret_access=params["api_key"])
self.linebreakwise = self.translate
# maps a t. object for config dict {"goog":1, "deepl":0}
def assign_service(params):
if params.get("deepl_web"):
return deepl_web(self.params)
elif params.get("deepl_api"):
return deepl_api(params)
elif params.get("translate_python"):
return translate_python(params)
else:
return google(params)