LibreOffice plugin to pipe whole Writer documents through Google Translate, that ought to keep most of the page formatting.

⌈⌋ branch:  PageTranslate


Artifact [500cbd9eec]

Artifact 500cbd9eec67c09846226bfc4fbd4a52a58ccccb:

  • File pagetranslate.py — part of check-in [3f945d5495] at 2021-06-10 14:50:23 on branch trunk — Move MessageBox() to unocompat (not actually used anymore, doesn't work in LO-dev-7.2 anyway), sys.excepthook doesn't suffice for dialog hookup. Add config btn_map{} for external tools from settings dialog. (user: mario size: 17523)

#!/usr/bin/python
# encoding: utf-8
# api: uno
# type: callback
# category: language
# title: PageTranslate
# description: Action button to get whole Writer document translated
# version: 1.9.74
# state: beta
# author: mario
# url: https://fossil.include-once.org/pagetranslate/
# depends: python:requests (>= 2.5), python:uno
# pack: *.py, pythonpath/*.py, META-INF/*, pkg-desc, *.x*, icons/*
# config:
#    { name: frames, type: bool, value: 0, description: traverse TextFrames }
#    { name: quick, type: bool, value: 0, description: newline placeholders }
#    { name: slow, type: bool, value: 0, description: traverse TextPortions }
#    { name: debug, type: bool, value: 1, description: default logging level }
#    { name: flag, type: str, value: "locale", description: second btn action }
# license: GNU LGPL 2.1
# forked-from: TradutorLibreText (Claudemir de Almeida Rosa)
# 
# LibreOffice plugin for translating documents that's supposed to retain formatting.
# Per default does not require a text selection to operate, but works on the whole
# page.
# The original mode (TradutorLibreText) is still supported and used whenever a text
# portion is selected. It also uses the default target language (English) then.
# Unless a different mode/language from the Tools>PageTranslate menu is requested.
#
# Beware that Writer freezes during the dozens of translation calls to Google.
# In particular long documents might take ages, because each paragraph/line or
# text longer 1900 chars causes another roundtrip.
#
# Basic support for Draw/Impress documents is now provided. (No text selection
# mode there however).
#
# There's a configuration dialog now, under Tools→Options→[Language→PageTranslate].
# Where you can switch the translation service, and set a few options. You'll
# need an API key for DeepL API or Microsoft Translator. Or set an email for
# MyMemory, or a command for using a CLI translation program. Other services
# are provides by deep-translator or translate-python. (Might require a `pip
# install` each, unless you install a bundled 20MB extension release.)
#
# Always creates a log file: /tmp/pagetranslate-libreoffice.log
#
# Without pythonpath/ populated, this plugin won't work on Windows installations
# fully (only the Google Translate option is likely to).
#


# OpenOffice UNO bridge
import uno, unohelper
from com.sun.star.task import XJobExecutor
from unocompat import PropertyValue, XNamedAsEnumeration, MessageBox
from com.sun.star.awt import XActionListener, XContainerWindowEventHandler
from com.sun.star.lang import Locale, XServiceInfo, XInitialization
# core modules
import os, sys
import string, json, re
from traceback import format_exc
from tempfile import gettempdir
# log setup
import logging as log
log.basicConfig(filename='%s/pagetranslate-libreoffice.log'%gettempdir(), level=log.DEBUG)
sys.excepthook = lambda *exc: log.critical(format_exc())
# pythonpath/*.py modules
import httprequests
import translationbackends
import pt_dialogs


# Office plugin
class pagetranslate(unohelper.Base, XJobExecutor):

    # defaults + config + command args
    params = dict(
        mode = "page",      # "trigger"/"page", or "tradutor"
        lang = "en",        # target language, or "flag", or "paragraph", "locale", "select", "mri-debug"
        frames = 0,         # also process TextFrames (subdocuments)
        quick = 0,          # use temporary newline placeholders, or split/iterate over text sections
        slow = 0,           # further split over paragraph segments/formatting (super slow mode)
        debug = 1,          # logging level
        backend = "Google", # backend to use, (string name replaces old flags)
        api_key = "",       # API key
        email = "",         # MyMemory email
        cmd = "translate-cli -o -f auto -t {lang} {text}",  # cli tool
        flag = "locale",    # default lang for secondary 🏴 button
    )
    t = None   #= translationbackends.google(self.params)

    # gets instantiated as XJobExecutor by LibreOffice
    def __init__(self, ctx):
        log.info("init")
        self.ctx = ctx
        self.desktop = self.ctx.ServiceManager.createInstanceWithContext( "com.sun.star.frame.Desktop", self.ctx )
        self.document = self.desktop.getCurrentComponent()
        #self.dispatcher = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.DispatchHelper", self.ctx)

    # invoked from toolbar button
    def trigger(self, args):
        log.info(".trigger(args='%s') invoked" % repr(args))
        try:
            # merge defaults from registry + params from args
            self.params["from"] = "auto"
            self.params.update(settings(self.ctx).read())
            self.params.update(self.argparse(args))
            if self.params.get("debug"):
                log.root.handlers[0].setLevel(log.DEBUG)
            if self.params.get("mode") == "mri":
                self.mri(self)
            if self.params.get("lang") == "flag":
                self.params["lang"] = self.params.get("flag", "locale")
            if self.params.get("lang") == "select" or self.params.get("from") == "select":
                self.params["from"], self.params["lang"] = pt_dialogs.langselect()
            if self.params.get("lang") in ("mri-debug", "mri", "debug"):
                return self.mri(self.document)
            log.info(repr(self.params))

            # Draw/Impress?
            log.debug(dir(self.document))
            if self.document.supportsService("com.sun.star.drawing.DrawingDocument") or self.document.supportsService("com.sun.star.presentation.PresentationDocument"):
                self.assign_t()
                self.drawtranslate(self.document.getDrawPages())
                return

            # check for text selection, and switch to TradutorLibreText method then
            selection = self.document.getCurrentController().getSelection().getByIndex(0)
            if len(selection.getString()):
                self.rewrite_selection(selection)
                return

            # else iterate over text snippets
            self.assign_t()
            tree = self.document.getText().createEnumeration()
            self.traverse(tree, slow=self.params.get("slow"))
            if self.params.get("frames"):
                self.traverse(XNamedAsEnumeration(self.document.getTextFrames()))
        # show message box for errors from wherever
        except Exception as exc:
            self.exc(exc)
        finally:
            log.info("----")

    # central handler for errors
    def exc(self, exc, *a, **kw):
        dump = format_exc()
        log.error(dump)
        pt_dialogs.exception(err=str(exc), exc=dump)
        #except:
        #    MessageBox(self, dump)

    # map self.t.translate() implementation according to settings
    def assign_t(self):
        self.t = translationbackends.assign_service(self.params)
        log.info(self.t)
    
    # break up UNO service: url query string `.pagetranslate?page&lang=en`
    def argparse(self, args):
        # parameterize leading ?action&
        args = "mode=" + args
        # key=value pairs
        params = dict(re.findall("(\w+)=([\w-]+)", args))
        # replace default locale
        if params.get("lang","-") == "locale":
            params["lang"] = self.getOoLocale()
        return params

    # debugging/introspection
    def mri(self, obj):
        mri = self.ctx.ServiceManager.createInstanceWithContext("mytools.Mri", self.ctx)
        mri.inspect(obj)

    #-- iterate over TextContent/TextTable nodes
    def traverse(self, tree, slow=0):
        log.info("TextDocument.Enumeration…")
        while tree.hasMoreElements():
            para = tree.nextElement()
            log.info(para)
            # table/cells
            if para.supportsService("com.sun.star.text.TextTable"):
                for cellname in para.getCellNames():
                    log.debug(cellname)
                    text = para.getCellByName(cellname).getText()
                    #self.traverse(text.createEnumeration())
                    text.setString(self.t.linebreakwise(text.getString())) # or .translate #linebreakwise
            # subdocuments?
            elif para.supportsService("com.sun.star.text.TextFrame"):
                log.debug("TextFrame.Enumeration…")
                self.traverse(para.getText().createEnumeration())
            # a paragraph can be further enumerated for text portions (same character/style attributes),
            # but that will obviously slow things down further / also complicate coherent translations
            elif slow and para.supportsService("com.sun.star.text.Paragraph"): # doesn't work with com.sun.star.container.XEnumerationAccess?
                self.traverse(para.createEnumeration(), slow=0)  # list of TextPortion`s
            # normal flow text / paragraph
            elif para.supportsService("com.sun.star.text.TextContent") or para.supportsService("com.sun.star.text.TextPortion"):
                text = para.getString()
                text = self.t.translate(text)
                para.setString(text)
            else:
                log.warning("Unsupported document element.")
                #self.mri(para)

    #-- iterate over DrawPages and TextShapes
    def drawtranslate(self, pages):
        for pi in range(0, pages.getCount()):
            page = pages.getByIndex(pi)
            for si in range(0, page.getCount()):
                shape = page.getByIndex(si)
                if shape.supportsService("com.sun.star.drawing.TextShape"):
                    log.info(shape)
                    shape.Text.setString(self.t.translate(shape.Text.getString()))

    #-- TradutorLibreText (selection rewrite)
    def rewrite_selection(self, xTextRange):
        log.info("rewrite text selection")

        # Get selected text and language
        string = xTextRange.getString()
        if self.params["lang"] == "paragraph":
            self.params["lang"] = xTextRange.CharLocale.Language
        elif self.params["mode"] == "tradutor":
            code = self.getOoLocale()
            self.params["lang"] = self.getParaLang(xTextRange).Language
        log.debug("paragraph.lang="+self.params["lang"])

        # we kinda have to reinstantiate the backend late, because params` lang= might be hard-applied to handler (e.g. translate-python)
        self.assign_t()

        # translate/replace (plain text) with linebreaks intact
        trans = self.t.linebreakwise(string)
        trans = trans.replace('\\n',"\n").replace('\\r',"\n")
        log.info(".setString from '"+string+"' to ("+self.params["lang"]+")='"+trans+"'")
        xTextRange.setString(trans)

    # Query system locale
    def getOoLocale(self):
        self.LocaleData = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.i18n.LocaleData", self.ctx)
        L10Ncfg = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.configuration.ConfigurationProvider", self.ctx)
        nodepath = PropertyValue(Name="nodepath", Value="/org.openoffice.Setup/L10N")
        code = L10Ncfg.createInstanceWithArguments("com.sun.star.configuration.ConfigurationAccess", (nodepath,)).getByName("ooLocale")
        log.info("ooLocale="+repr(code))
        return code

    # Langinfo=(com.sun.star.i18n.LanguageCountryInfo){ Language = (string)"de", LanguageDefaultName = (string)"German", Country = (string)"DE", CountryDefaultName = (string)"Germany", Variant = (string)"" }
    def getParaLang(self, xTextRange):
        Langinfo = self.LocaleData.getLanguageCountryInfo(xTextRange.CharLocale)
        log.info("Langinfo="+repr(Langinfo))
        return Langinfo


# XActionListener for callbacks
class action_listener(unohelper.Base, XActionListener):
    def __init__(self, cb):
        self.actionPerformed = cb

# Handler for settings-embedded DialogOptions.xdl window, and read/write access to our leaf in the office registry.
# (This is fairly generic/reusable, because it directly maps a dict to/from the dialog widgets.)
#
class settings(unohelper.Base, XContainerWindowEventHandler, XServiceInfo):
    impl_id = "vnd.include-once.OptionsPageTranslate"
    btn_map = {
        "cfg_argos": "PYTHONPATH= argos-translate-gui &",
        "cfg_deps": "x-terminal-emulator -c 'pip install -U requests translate deep-translator argos-translate' &",
    }

    def __init__(self, ctx, *kargs):
        log.info("OptionsPageTranslate:settings.__init__()")
        self.access = self.updatemgr(ctx)
        log.debug(dir(self.access)) #→ ['AsProperty', 'ElementNames', 'ElementType', 'HierarchicalName', 'HierarchicalPropertySetInfo', 'ImplementationId', 'ImplementationName', 'Name', 'PendingChanges', 'Properties', 'PropertySetInfo', 'SupportedServiceNames', 'Types', 'addChangesListener', 'addContainerListener', 'addEventListener', 'addPropertiesChangeListener', 'addPropertyChangeListener', 'addVetoableChangeListener', 'api_key', 'api_key', 'commitChanges', 'composeHierarchicalName', 'debug', 'debug', 'deepl_api', 'deepl_api', 'deepl_web', 'deepl_web', 'dispose', 'firePropertiesChangeEvent', 'getAsProperty', 'getByHierarchicalName', 'getByName', 'getElementNames', 'getElementType', 'getExactName', 'getHierarchicalName', 'getHierarchicalPropertySetInfo', 'getHierarchicalPropertySetInfo', 'getHierarchicalPropertyValue', 'getHierarchicalPropertyValues', 'getImplementationId', 'getImplementationName', 'getName', 'getPendingChanges', 'getProperties', 'getPropertyByHierarchicalName', 'getPropertyByName', 'getPropertySetInfo', 'getPropertySetInfo', 'getPropertyValue', 'getPropertyValues', 'getSupportedServiceNames', 'getTypes', 'google', 'google', 'hasByHierarchicalName', 'hasByName', 'hasElements', 'hasPendingChanges', 'hasPropertyByHierarchicalName', 'hasPropertyByName', 'queryAdapter', 'queryInterface', 'removeChangesListener', 'removeContainerListener', 'removeEventListener', 'removePropertiesChangeListener', 'removePropertyChangeListener', 'removeVetoableChangeListener', 'replaceByHierarchicalName', 'replaceByName', 'setHierarchicalPropertyValue', 'setHierarchicalPropertyValues', 'setName', 'setPropertyValue', 'setPropertyValues', 'supportsService']

    # get handle on OpenOffice registry (read/write)
    def updatemgr(self, ctx, registry="/vnd.include-once.pagetranslate.Options/Leaves/Flags"):
        try:
            nodepath = PropertyValue(Name="nodepath", Value=registry)
            config = ctx.ServiceManager.createInstanceWithContext("com.sun.star.configuration.ConfigurationProvider", ctx)
            return config.createInstanceWithArguments("com.sun.star.configuration.ConfigurationUpdateAccess", (nodepath,))
        except:
            log.error(format_exc())

    # read/store config dict
    def read(self):
        log.debug("OptonsPageTranslate:settings.read()")
        try:
            return dict((name, self.access.getByName(name)) for name in self.access.getElementNames())
        except:
            log.error(format_exc())
            return {}
    def write(self, cfg):
        for name, value in cfg.items():
            if self.access.hasByName(name):
                self.access.setPropertyValue(name, value)
        self.access.commitChanges()

    # invoked on dialog initialization or for saving
    def callHandlerMethod(self, window=".UnoDialogControl", action="initialize|ok|back", name="external_event"):
        log.debug("OptonsPageTranslate:settings.callHandlerMethod({}, {}, {})".format(repr(window), action, name))
        try:
            params = self.read()
            log.info(repr(params))
            # iterate over all dialog controls by name, and assign from/to config dict
            for name, cntrl in [(c.Model.Name, c) for c in window.getControls()]:
                log.info(name)
                if name in self.btn_map:
                    cntrl.addActionListener(action_listener(lambda *x: os.system(self.btn_map[name])))
                elif action == "initialize":
                    self.setControlValue(cntrl, params.get(name))
                elif action == "ok":
                    params[name] = self.getControlValue(cntrl)
            if action == "ok":
                self.write(params)
        except:
            log.error(format_exc())
        return True
    # deal with CheckBox/TextEdit control differences
    def getControlValue(self, c):
        if hasattr(c, "State"): return int(1 if c.State else 0)
        elif hasattr(c, "Text"): return str(c.Text)
        elif hasattr(c, "getSelectedItem"): return str(c.getSelectedItem())
    def setControlValue(self, c, value):
        if hasattr(c, "State"): c.State = int(value if value else 0)
        elif hasattr(c, "Text"): c.Text = str(value if value else "")
        elif hasattr(c, "selectItem"): c.selectItem(str(value if value else ""), True)
        #else: log.debug([c, dir(c)])
   
    # XContainerWindowEventHandler
    def getSupportedMethodNames(self): return ("external_event",)
    # XServiceInfo
    def supportsService(self, name): return (name == self.impl_id)
    def getImplementationName(self): return self.impl_id
    def getSupportedServiceNames(self): return (self.impl_id,)
    def getServiceNames(self): return (self.impl_id,)


# register with LibreOffice
g_ImplementationHelper = unohelper.ImplementationHelper()
g_ImplementationHelper.addImplementation( pagetranslate, "org.openoffice.comp.pyuno.pagetranslate", ("com.sun.star.task.Job",), )
g_ImplementationHelper.addImplementation( settings, settings.impl_id, () )