Check-in [9287fd7b5e]
Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Extract via_* classes into pythonpath/translationbackends. As well as requests/http/urllib-fallback code into separate module. Apply new nodepath for ConfigurationUpdateAccess (but keep using .json config file for now). |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | 1.2 |
Files: | files | file ages | folders |
SHA1: |
9287fd7b5e74fb9baf3b7113c5eadbe9 |
User & Date: | mario 2020-05-24 08:51:50 |
Context
2020-05-24
| ||
18:59 | Introduce more options (microsoft, mymemory, cli) check-in: 4232826ef2 user: mario tags: trunk | |
08:51 | Extract via_* classes into pythonpath/translationbackends. As well as requests/http/urllib-fallback code into separate module. Apply new nodepath for ConfigurationUpdateAccess (but keep using .json config file for now). check-in: 9287fd7b5e user: mario tags: trunk, 1.2 | |
08:48 | Add dialog option for python-translate (deepl_web might be too difficult after all). Add schema.xcs in manifest.xml (So, that's why it never took effect! *surprised_pikachu_face*) check-in: 99cd577149 user: mario tags: trunk | |
Changes
Changes to pagetranslate.py.
1 2 3 4 5 6 7 | #!/usr/bin/python # encoding: utf-8 # api: uno # type: callback # category: language # title: PageTranslate # description: Action button to get whole Writer document translated | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | #!/usr/bin/python # encoding: utf-8 # api: uno # type: callback # category: language # title: PageTranslate # description: Action button to get whole Writer document translated # version: 1.2 # state: beta # author: mario # url: https://fossil.include-once.org/pagetranslate/ # depends: python:requests (>= 2.5) # pack: *.py, pythonpath/*.py, META-INF/*, pkg-desc, *.x*, icons/* # license: GNU LGPL 2.1 # forked-from: TradutorLibreText (Claudemir de Almeida Rosa) # config: - # # LibreOffice plugin for translating documents that's supposed to retain formatting. # Per default does not require a text selection to operate, but works on the whole # page. |
︙ | ︙ | |||
46 47 48 49 50 51 52 | import os, sys import string, json, re from traceback import format_exc from tempfile import gettempdir # log file import logging as log log.basicConfig(filename='%s/pagetranslate-libreoffice.log'%gettempdir(), level=log.DEBUG) | | < < < < < | < < | < < < < < < < < < | < | < < | 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | import os, sys import string, json, re from traceback import format_exc from tempfile import gettempdir # log file import logging as log log.basicConfig(filename='%s/pagetranslate-libreoffice.log'%gettempdir(), level=log.DEBUG) # pythonpath/*.py modules import httprequests httprequests.log = log import translationbackends translationbackends.log = log # config file env = os.environ.get class config: fn = env("XDG_CONFIG_HOME", env("APPDATA", env("HOME", "")+"/.config"))+"/libreoffice/pagetranslate.json" @staticmethod |
︙ | ︙ | |||
94 95 96 97 98 99 100 | except Exception as e: log.error("couldn't write to config file") @staticmethod def map(params): c = config.read() if not c: return | | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | > > | | | < | | | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | except Exception as e: log.error("couldn't write to config file") @staticmethod def map(params): c = config.read() if not c: return params["crlf"] = "quick" if c.get("quick") else "iterate" if c.get("debug"): params["log"] = "debug" # Office plugin class pagetranslate(unohelper.Base, XJobExecutor): # defaults + config + command args params = dict( mode = "page", # "trigger"/"page", or "tradutor" lang = "en", # target language, or "paragraph", or "locale" crlf = "iterate", # split paragraph sentences? or "quick" for temporary placeholder log = "debug", # logging level google = 1, # backend to use deepl_web = 0, deepl_api = 0, deepl_key = "", translate_python = 0, ) t = None #= translationbackends.google(self.params) # gets instantiated as XJobExecutor by LibreOffice def __init__(self, ctx): log.info("init") self.ctx = ctx desktop = self.ctx.ServiceManager.createInstanceWithContext( "com.sun.star.frame.Desktop", self.ctx ) self.document = desktop.getCurrentComponent() #self.dispatcher = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.DispatchHelper", self.ctx) # invoked from toolbar button def trigger(self, args): log.debug(".trigger(args=%s) invoked" % repr(args)) try: # merge defaults from `.config/libreoffice/pagetranslate.json` + params from args config.map(self.params) self.argparse(args) log.info(repr(self.params)) # map self.t.translate() implementation according to settings self.t = translationbackends.assign_service(self.params) log.info(self.t) # Draw/Impress? log.debug(dir(self.document)) if self.document.supportsService("com.sun.star.drawing.DrawingDocument") or self.document.supportsService("com.sun.star.presentation.PresentationDocument"): log.info(self.document) self.drawtranslate(self.document.getDrawPages()) |
︙ | ︙ | |||
345 346 347 348 349 350 351 352 353 | return myBox.execute() # @src https://forum.openoffice.org/en/forum/viewtopic.php?f=20&t=96509 โ the __init__(,args) parameter was invalid # @src https://github.com/p--q/OptionsDialog/blob/master/OptionsDialog/src/pythonpath/optionsdialog/component.py # class pagetranslate_opts(unohelper.Base, XContainerWindowEventHandler, XServiceInfo): impl_id = "vnd.include-once.OptionsPageTranslate" | > | | > > > < | | > > | | < | | | | | 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 | return myBox.execute() # @src https://forum.openoffice.org/en/forum/viewtopic.php?f=20&t=96509 โ the __init__(,args) parameter was invalid # @src https://github.com/p--q/OptionsDialog/blob/master/OptionsDialog/src/pythonpath/optionsdialog/component.py # # DialogOptions.xdl handler class pagetranslate_opts(unohelper.Base, XContainerWindowEventHandler, XServiceInfo): impl_id = "vnd.include-once.OptionsPageTranslate" fields = ["google" , "deepl_web", "deepl_api", "api_key", "translate_python", "debug", "quick"] def __init__(self, ctx, *kargs): log.info("pt_opts.init") self.ctx = ctx try: # "path not found": seems the OptionsSchema.xcs is faulty, no idea where it should end up nodepath = PropertyValue(Name="nodepath", Value="/vnd.include-once.pagetranslate.OptionsSchema/Leaves/Settings") self.config = ctx.ServiceManager.createInstanceWithContext("com.sun.star.configuration.ConfigurationProvider", ctx) log.debug(self.config) self.access = self.config.createInstanceWithArguments("com.sun.star.configuration.ConfigurationUpdateAccess", (nodepath,)) log.debug(self.access) log.debug(dir(self.access)) except Exception as e: log.error(format_exc()) log.info("done_init") # invoked on dialog initialization or for saving def callHandlerMethod(self, window=".UnoDialogControl", action="initialize|ok|back", name="external_event"): try: #log.info("{}:{}, win={}, funcs={}".format(name, action, window, dir(window))) params = config.read() for name in self.fields: log.info(name) if action == "initialize": self.setValue(window.getControl(name), params.get(name, "")) elif action == "ok": params[name] = self.getValue(window.getControl(name)) config.write(params) log.info(repr(params)) except Exception as e: log.error(format_exc()) return True # deal with CheckBox/TextEdit control differences def getValue(self, cntrl): return cntrl.State if "State" in dir(cntrl) else cntrl.Text def setValue(self, cntrl, value): if "State" in dir(cntrl): cntrl.State = value else: cntrl.Text = str(value) # XContainerWindowEventHandler def getSupportedMethodNames(self): return ("external_event",) # XServiceInfo def supportsService(self, name): return (name == self.impl_id) def getImplementationName(self): return self.impl_id |
︙ | ︙ |
Added pythonpath/httprequests.py.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | # encoding: utf-8 # api: python # type: classes # category: http # title: request/fallback # description: loads requests, or similuates API via urllib # version: 0.5 # state: beta # depends: python:requests (>= 2.5) # config: - # # Wraps requests or fakes a http.get() implementation. # __all__ = ["http", "urllib", "urlencode", "quote", "quote_plus"] # http preparations import urllib from urllib.parse import urlencode, quote, quote_plus try: import requests http = requests.Session() except Exception as e: log.error("Missing library: `pip install requests` (either system-wide, or in your libreoffice program/ folder)") from urllib.request import urlopen, Request class fake_requests: content = "" ssl_args = {} headers = {} def __init__(self): if sys.platform != 'win32': return import ssl myssl = ssl.create_default_context(); myssl.check_hostname = False myssl.verify_mode = ssl.CERT_NONE self.ssl_args["context"] = myssl def get(self, url): self.content = urlopen( Request(url, headers=self.headers), **self.ssl_args ).read() return self http = fake_requests() # headers http.headers.update({ "User-Agent": "Mozilla/5.0 (X11; Linux; LibreOffice/6.3), TradutorLibreText/1.3+PageTranslate/1.2", "Accept-Language": "*; q=1.0", "Accept-Encoding": "utf-8" }) |
Added pythonpath/translationbackends.py.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | # encoding: utf-8 # api: pagetranslate # type: classes # category: language # title: via_* translation backends # description: Implements the alternative services (google, deepl, ...) # version: 1.2 # state: beta # depends: python:requests (>= 2.5) # config: - # # Different online service backends and http interfaces are now coalesced here. # # modules import re import urllib from urllib.parse import urlencode, quote, quote_plus from httprequests import http log = None # translation backend/service class google: # regex rx_gtrans = re.compile('class="t0">(.+?)</div>', re.S) rx_splitpara = re.compile("(.{1,1895\.}|.{1,1900}\s|.*$)", re.S) rx_empty = re.compile("^[\s\d,.:;ยง():-]+$") rx_letters = re.compile("\w\w+", re.UNICODE) rx_breakln = re.compile("\s?/\s?#\s?ยง\s?/\s?") def __init__(self, params={}): self.params = params # config+argparse # request text translation from google def askgoogle(self, text, dst_lang="en", src_lang='auto'): # fetch translation page url = "http://translate.google.com/m?hl=%s&sl=%s&q=%s" % ( dst_lang, src_lang, quote_plus(text) ) html = http.get(url).content.decode("utf-8") # extract content from text <div> m = self.rx_gtrans.search(html) if m: text = m.group(1) text = text.replace("'", "'").replace("&", "&").replace("<", "<").replace(">", ">").replace(""", '"') #@todo: https://stackoverflow.com/questions/2087370/decode-html-entities-in-python-string else: log.warning("NO TRANSLATION RESULT EXTRACTED: " + html) log.debug("ORIG TEXT: " + repr(text)) return text # iterate over text segments (1900 char limit) def translate(self, text, lang="auto"): if lang == "auto": lang = self.params["lang"] #log.debug("translate %d chars" % len(text)) if len(text) < 2: log.debug("skipping/len<2") return text elif self.rx_empty.match(text): log.debug("skipping/empty") return text elif not self.rx_letters.search(text): log.debug("skipping/noletters") return text elif len(text) >= 1900: log.debug("spliterate/1900+") return " ".join(self.askgoogle(segment, lang) for segment in self.rx_splitpara.findall(text)) else: return self.askgoogle(text, lang) # translate w/ preserving paragraph breaks (meant for table cell content) def linebreakwise(self, text, lang="auto"): if self.params["crlf"] != "quick": # split on linebreaks and translate each individually text = "\n\n".join(self.translate(text, lang) for text in text.split("\n\n")) else: # use temporary placeholder `/#ยง/` text = self.translate(text.replace("\n\n", "/#ยง/"), lang) text = re.sub(self.rx_breakln, "\n\n", text) return text class deepl_web(google): # < https://www2.deepl.com/jsonrpc # cookies: LMTBID: GUID... # referer: https://www.deepl.com/translator # body: {"jsonrpc":"2.0","method": "LMT_handle_jobs","params":{"jobs":[{"kind":"default","raw_en_sentence":"...","raw_en_context_before":[],"raw_en_context_after":[],"preferred_num_beams":4,"quality":"fast"}],"lang":{"user_preferred_langs":["DE","EN"],"source_lang_user_selected":"auto","target_lang":"DE"},"priority":-1,"commonJobParams":{},"timestamp":1590258680854},"id":700000000} # > result.translations[0].beams[0].postprocessed_sentence pass class deepl_api(deepl_web): pass # requires `pip install translate` class translate_python: def __init__(self, params={}): self.params = params # config+argparse self.error = pagetranslate.MessageBox try: from translate import Translator except: self.error("Use `pip install translate` to use this module.") self.translate = Translator(provider="microsoft", to_lang=params["lang"], secret_access=params["api_key"]) self.linebreakwise = self.translate # maps a t. object for config dict {"goog":1, "deepl":0} def assign_service(params): if params.get("deepl_web"): return deepl_web(self.params) elif params.get("deepl_api"): return deepl_api(params) elif params.get("translate_python"): return translate_python(params) else: return google(params) |