LibreOffice plugin to pipe whole Writer documents through Google Translate, that ought to keep most of the page formatting.

⌈⌋ ⎇ branch:  PageTranslate


Artifact [67dd9c3bf1]

Artifact 67dd9c3bf1793be7d3c09b0839dce99a550cfacc:

  • File pagetranslate.py — part of check-in [10276ceb8e] at 2020-05-25 17:22:37 on branch trunk — Remove some logging, fix setLevel on debug mode. Move config dialog into Language settings leave. Remove `crlf` in favour of `quick` mode. (user: mario size: 14334)

#!/usr/bin/python
# encoding: utf-8
# api: uno
# type: callback
# category: language
# title: PageTranslate
# description: Action button to get whole Writer document translated
# version: 1.3
# state: beta
# author: mario
# url: https://fossil.include-once.org/pagetranslate/
# depends: python:requests (>= 2.5), python:translate
# pack: *.py, pythonpath/*.py, META-INF/*, pkg-desc, *.x*, icons/*
# license: GNU LGPL 2.1
# forked-from: TradutorLibreText (Claudemir de Almeida Rosa)
# config: -
# 
# LibreOffice plugin for translating documents that's supposed to retain formatting.
# Per default does not require a text selection to operate, but works on the whole
# page.
# The original mode (TradutorLibreText) is still supported and used whenever a text
# portion is selected. It also uses the default target language (English) then.
# Unless a different mode/language from the Tools>PageTranslate menu is requested.
#
# Beware that Writer freezes during the dozens of translation calls to Google.
# In particular long documents might take ages, because each paragraph/line or
# text longer 1900 chars causes another roundtrip.
#
# Basic support for Draw/Impress documents is now provided. (No text selection
# mode there however).
#
# There's a configuration dialog now, under Tools→Options→[Writer→PageTranslate].
# Where you can switch the translation service, and set a few options. You'll
# need an API key for DeepL API or Microsoft Translate. Or set an email for
# MyMemory, or a command for using a CLI translation program.
#
# Always creates a log file: /tmp/pagetranslate-libreoffice.log
#
# Without pythonpath/ populated, this plugin won't work on Windows installations
# fully (only the Google Translate option is likely to).
#


# OpenOffice UNO bridge
import uno, unohelper
from com.sun.star.task import XJobExecutor
from com.sun.star.awt.MessageBoxButtons import BUTTONS_OK, BUTTONS_OK_CANCEL, BUTTONS_YES_NO, BUTTONS_YES_NO_CANCEL, BUTTONS_RETRY_CANCEL, BUTTONS_ABORT_IGNORE_RETRY
from com.sun.star.awt.MessageBoxButtons import DEFAULT_BUTTON_OK, DEFAULT_BUTTON_CANCEL, DEFAULT_BUTTON_RETRY, DEFAULT_BUTTON_YES, DEFAULT_BUTTON_NO, DEFAULT_BUTTON_IGNORE
from com.sun.star.awt.MessageBoxType import MESSAGEBOX, INFOBOX, WARNINGBOX, ERRORBOX, QUERYBOX
from com.sun.star.beans import PropertyValue
from com.sun.star.awt import XActionListener, XContainerWindowEventHandler
from com.sun.star.lang import Locale, XServiceInfo, XInitialization
# sys modules
import os, sys
import string, json, re
from traceback import format_exc
from tempfile import gettempdir
# log file
import logging as log
log.basicConfig(filename='%s/pagetranslate-libreoffice.log'%gettempdir(), level=log.WARNING)
# pythonpath/*.py modules
import httprequests
httprequests.log = log
import translationbackends
translationbackends.log = log



# Office plugin
class pagetranslate(unohelper.Base, XJobExecutor):

    # defaults + config + command args
    params = dict(
        mode = "page",      # "trigger"/"page", or "tradutor"
        lang = "en",        # target language, or "paragraph", or "locale"
        quick = 0,          # use temporary newline placeholders, or split/iterate over text sections
        debug = 1,          # logging level
        google = 1,         # backend to use
        deepl_web = 0,
        deepl_api = 0,
        deepl_key = "",
        microsoft = 0,
        mymemory = 0,
        cli = 0,
    )
    t = None   #= translationbackends.google(self.params)


    # gets instantiated as XJobExecutor by LibreOffice
    def __init__(self, ctx):
        log.info("init")
        self.ctx = ctx
        desktop = self.ctx.ServiceManager.createInstanceWithContext( "com.sun.star.frame.Desktop", self.ctx )
        self.document = desktop.getCurrentComponent()
        #self.dispatcher = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.DispatchHelper", self.ctx)


    # invoked from toolbar button
    def trigger(self, args):
        log.debug(".trigger(args=%s) invoked" % repr(args))
        try:
            # merge defaults from registry + params from args
            self.params.update(settings(self.ctx).read())
            self.params.update(self.argparse(args))
            if self.params.get("debug"):
                log.root.handlers[0].setLevel(log.DEBUG)
            log.info(repr(self.params))

            # map self.t.translate() implementation according to settings
            self.t = translationbackends.assign_service(self.params)
            log.info(self.t)

            # Draw/Impress?
            log.debug(dir(self.document))
            if self.document.supportsService("com.sun.star.drawing.DrawingDocument") or self.document.supportsService("com.sun.star.presentation.PresentationDocument"):
                log.info(self.document)
                self.drawtranslate(self.document.getDrawPages())
                return

            # check for text selection, and switch to TradutorLibreText method then
            selection = self.document.getCurrentController().getSelection().getByIndex(0)
            if len(selection.getString()):
                return self.rewrite_selection(selection)

            # else iterate over text snippets
            tree = self.document.getText().createEnumeration()
            self.traverse(tree)

        except Exception as exc:
            log.error(format_exc())
            self.MessageBox(format_exc())
        log.info("----")

    
    # break up UNO service: url query string `.pagetranslate?page&lang=en`
    def argparse(self, args):
        # parameterize leading ?action&
        args = "mode=" + args
        # key=value pairs
        params = dict(re.findall("(\w+)=([\w-]+)", args))
        # replace default locale
        if params.get("lang","-") == "locale":
            params["lang"] = self.getOoLocale()
        return params


    #-- iterate over TextContent/TextTable nodes
    def traverse(self, tree):
        log.info("TextDocument.Enumeration…")
        while tree.hasMoreElements():
            para = tree.nextElement()
            log.info(para)
            # table/cells
            if para.supportsService("com.sun.star.text.TextTable"):
                for cellname in para.getCellNames():
                    log.debug(cellname)
                    text = para.getCellByName(cellname).getText()
                    #self.traverse(text.createEnumeration())
                    text.setString(self.t.linebreakwise(text.getString())) # or .translate #linebreakwise
                pass
            # normal flow text
            elif para.supportsService("com.sun.star.text.TextContent"):
                text = para.getString()
                text = self.t.translate(text)
                para.setString(text)
                # the paragraph itself can be enumerated for text portions,
                # but for now it's really slow enough
            else:
                log.warning("Unsupported document element.")

    #-- iterate over DrawPages and TextShapes
    def drawtranslate(self, pages):
        for pi in range(0, pages.getCount()):
            page = pages.getByIndex(pi)
            for si in range(0, page.getCount()):
                shape = page.getByIndex(si)
                if shape.supportsService("com.sun.star.drawing.TextShape"):
                    log.info(shape)
                    shape.Text.setString(self.t.translate(shape.Text.getString()))


    #-- TradutorLibreText (selection rewrite)
    def rewrite_selection(self, xTextRange):
        log.info("rewrite text selection")

        # Get selected text
        string = xTextRange.getString()
        if self.params["lang"] == "paragraph":
            self.params["lang"] = xTextRange.CharLocale.Language
        elif self.params["mode"] == "tradutor":
            code = self.getOoLocale()
            self.params["lang"] = self.getParaLang(xTextRange).Language

        try:
            trans = self.t.linebreakwise(string)
            trans = trans.replace('\\n',"\n").replace('\\r',"\n")
            xTextRange.setString(trans)
        except:
            self.MessageBox(format_exc())

    # Query system locale
    def getOoLocale(self):
        self.language = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.i18n.LocaleData", self.ctx)
        self.lang = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.configuration.ConfigurationProvider", self.ctx)
        properties = []
        arg = PropertyValue()
        arg.Name = "nodepath"
        arg.Value = "/org.openoffice.Setup/L10N"
        properties.append(arg)
        properties = tuple(properties)
        code = self.lang.createInstanceWithArguments("com.sun.star.configuration.ConfigurationAccess", properties).getByName("ooLocale")
        log.info("ooLocale="+repr(code))
        return code

    # Langinfo=(com.sun.star.i18n.LanguageCountryInfo){ Language = (string)"de", LanguageDefaultName = (string)"German", Country = (string)"DE", CountryDefaultName = (string)"Germany", Variant = (string)"" }
    def getParaLang(self, xTextRange):
        Langinfo = self.language.getLanguageCountryInfo(xTextRange.CharLocale)
        log.info("Langinfo="+repr(Langinfo))
        return Langinfo

    # user notifications
    def MessageBox(self,MsgText, MsgTitle="", MsgType=MESSAGEBOX, MsgButtons=BUTTONS_OK):
        ParentWin = self.document.getCurrentController().Frame.ContainerWindow
        ctx = uno.getComponentContext()
        sm = ctx.ServiceManager
        sv = sm.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx)
        myBox = sv.createMessageBox(ParentWin, MsgType, MsgButtons, MsgTitle, MsgText)
        return myBox.execute()



# handler for settings-embedded DialogOptions.xdl window,
# and read/write access to our leaf in the office registry
#
class settings(unohelper.Base, XContainerWindowEventHandler, XServiceInfo):
    impl_id = "vnd.include-once.OptionsPageTranslate"

    def __init__(self, ctx, *kargs):
        self.access = self.updatemgr(ctx)
        #log.debug(dir(self.access)) → ['AsProperty', 'ElementNames', 'ElementType', 'HierarchicalName', 'HierarchicalPropertySetInfo', 'ImplementationId', 'ImplementationName', 'Name', 'PendingChanges', 'Properties', 'PropertySetInfo', 'SupportedServiceNames', 'Types', 'addChangesListener', 'addContainerListener', 'addEventListener', 'addPropertiesChangeListener', 'addPropertyChangeListener', 'addVetoableChangeListener', 'api_key', 'api_key', 'commitChanges', 'composeHierarchicalName', 'debug', 'debug', 'deepl_api', 'deepl_api', 'deepl_web', 'deepl_web', 'dispose', 'firePropertiesChangeEvent', 'getAsProperty', 'getByHierarchicalName', 'getByName', 'getElementNames', 'getElementType', 'getExactName', 'getHierarchicalName', 'getHierarchicalPropertySetInfo', 'getHierarchicalPropertySetInfo', 'getHierarchicalPropertyValue', 'getHierarchicalPropertyValues', 'getImplementationId', 'getImplementationName', 'getName', 'getPendingChanges', 'getProperties', 'getPropertyByHierarchicalName', 'getPropertyByName', 'getPropertySetInfo', 'getPropertySetInfo', 'getPropertyValue', 'getPropertyValues', 'getSupportedServiceNames', 'getTypes', 'google', 'google', 'hasByHierarchicalName', 'hasByName', 'hasElements', 'hasPendingChanges', 'hasPropertyByHierarchicalName', 'hasPropertyByName', 'queryAdapter', 'queryInterface', 'removeChangesListener', 'removeContainerListener', 'removeEventListener', 'removePropertiesChangeListener', 'removePropertyChangeListener', 'removeVetoableChangeListener', 'replaceByHierarchicalName', 'replaceByName', 'setHierarchicalPropertyValue', 'setHierarchicalPropertyValues', 'setName', 'setPropertyValue', 'setPropertyValues', 'supportsService']

    # get handle on OpenOffice registry (read/write)
    def updatemgr(self, ctx, registry="/vnd.include-once.pagetranslate.Options/Leaves/Flags"):
        try:
            nodepath = PropertyValue(Name="nodepath", Value=registry)
            config = ctx.ServiceManager.createInstanceWithContext("com.sun.star.configuration.ConfigurationProvider", ctx)
            return config.createInstanceWithArguments("com.sun.star.configuration.ConfigurationUpdateAccess", (nodepath,))
        except:
            log.error(format_exc())
    # read/store config dict
    def read(self):
        return dict((name, self.access.getByName(name)) for name in self.access.getElementNames())
    def write(self, cfg):
        for name, value in cfg.items():
            if self.access.hasByName(name):
                self.access.setPropertyValue(name, value)
        self.access.commitChanges()

    # invoked on dialog initialization or for saving
    def callHandlerMethod(self, window=".UnoDialogControl", action="initialize|ok|back", name="external_event"):
        try:
            params = self.read()
            # iterate over all dialog controls by name, and assign from/to config dict
            for name, cntrl in [(c.Model.Name, c) for c in window.getControls()]:
                if action == "initialize":
                    self.setControlValue(cntrl, params.get(name, ""))
                elif action == "ok":
                    params[name] = self.getControlValue(cntrl)
            if action == "ok":
                self.write(params)
        except:
            log.error(format_exc())
        return True
    # deal with CheckBox/TextEdit control differences
    def getControlValue(self, c):
        if hasattr(c, "State"): return int(1 if c.State else 0)
        elif hasattr(c, "Text"): return str(c.Text)
    def setControlValue(self, c, value):
        if hasattr(c, "State"): c.State = int(value)
        elif hasattr(c, "Text"): c.Text = str(value)
   

    # XContainerWindowEventHandler
    def getSupportedMethodNames(self): return ("external_event",)
    # XServiceInfo
    def supportsService(self, name): return (name == self.impl_id)
    def getImplementationName(self): return self.impl_id
    def getSupportedServiceNames(self): return (self.impl_id,)
    def getServiceNames(self): return (self.impl_id,)
    


# register with LibreOffice
g_ImplementationHelper = unohelper.ImplementationHelper()
g_ImplementationHelper.addImplementation( pagetranslate, "org.openoffice.comp.pyuno.pagetranslate", ("com.sun.star.task.Job",), )
g_ImplementationHelper.addImplementation( settings, settings.impl_id, () )