LibreOffice plugin to pipe whole Writer documents through Google Translate, that ought to keep most of the page formatting.

⌈⌋ branch:  PageTranslate


Artifact Content

Artifact 93a4fd1fcf2ec62cf17ef188a5d804aad86d25e7:

  • File pagetranslate.py — part of check-in [5a0ee83349] at 2020-06-10 05:08:32 on branch trunk — Add default flags `-o -t {lang}` for cli backend. (user: mario size: 15679)

#!/usr/bin/python
# encoding: utf-8
# api: uno
# type: callback
# category: language
# title: PageTranslate
# description: Action button to get whole Writer document translated
# version: 1.5.66
# state: beta
# author: mario
# url: https://fossil.include-once.org/pagetranslate/
# depends: python:requests (>= 2.5), python:translate
# pack: *.py, pythonpath/*.py, META-INF/*, pkg-desc, *.x*, icons/*
# license: GNU LGPL 2.1
# forked-from: TradutorLibreText (Claudemir de Almeida Rosa)
# config: -
# 
# LibreOffice plugin for translating documents that's supposed to retain formatting.
# Per default does not require a text selection to operate, but works on the whole
# page.
# The original mode (TradutorLibreText) is still supported and used whenever a text
# portion is selected. It also uses the default target language (English) then.
# Unless a different mode/language from the Tools>PageTranslate menu is requested.
#
# Beware that Writer freezes during the dozens of translation calls to Google.
# In particular long documents might take ages, because each paragraph/line or
# text longer 1900 chars causes another roundtrip.
#
# Basic support for Draw/Impress documents is now provided. (No text selection
# mode there however).
#
# There's a configuration dialog now, under Tools→Options→[Language→PageTranslate].
# Where you can switch the translation service, and set a few options. You'll
# need an API key for DeepL API or Microsoft Translate. Or set an email for
# MyMemory, or a command for using a CLI translation program.
#
# Always creates a log file: /tmp/pagetranslate-libreoffice.log
#
# Without pythonpath/ populated, this plugin won't work on Windows installations
# fully (only the Google Translate option is likely to).
#


# OpenOffice UNO bridge
import uno, unohelper
from com.sun.star.task import XJobExecutor
from com.sun.star.awt.MessageBoxButtons import BUTTONS_OK, BUTTONS_OK_CANCEL, BUTTONS_YES_NO, BUTTONS_YES_NO_CANCEL, BUTTONS_RETRY_CANCEL, BUTTONS_ABORT_IGNORE_RETRY
from com.sun.star.awt.MessageBoxButtons import DEFAULT_BUTTON_OK, DEFAULT_BUTTON_CANCEL, DEFAULT_BUTTON_RETRY, DEFAULT_BUTTON_YES, DEFAULT_BUTTON_NO, DEFAULT_BUTTON_IGNORE
from com.sun.star.awt.MessageBoxType import MESSAGEBOX, INFOBOX, WARNINGBOX, ERRORBOX, QUERYBOX
from unocompat import PropertyValue, XNamedAsEnumeration
from com.sun.star.awt import XActionListener, XContainerWindowEventHandler
from com.sun.star.lang import Locale, XServiceInfo, XInitialization
# sys modules
import os, sys
import string, json, re
from traceback import format_exc
from tempfile import gettempdir
# log file
import logging as log
log.basicConfig(filename='%s/pagetranslate-libreoffice.log'%gettempdir(), level=log.DEBUG)
# pythonpath/*.py modules
import httprequests
import translationbackends



# Office plugin
class pagetranslate(unohelper.Base, XJobExecutor):

    # defaults + config + command args
    params = dict(
        mode = "page",      # "trigger"/"page", or "tradutor"
        lang = "en",        # target language, or "paragraph", or "locale"
        frames = 0,         # also process TextFrames (subdocuments)
        quick = 0,          # use temporary newline placeholders, or split/iterate over text sections
        slow = 0,           # further split over paragraph segments/formatting (super slow mode)
        debug = 1,          # logging level
        google = 1,         # backend to use
        deepl_web = 0,
        deepl_api = 0,
        api_key = "",
        microsoft = 0,
        mymemory = 0,
        email = "",
        cli = 0,
        cmd = "translate-cli -o -f auto -t {lang} {text}",
    )
    t = None   #= translationbackends.google(self.params)


    # gets instantiated as XJobExecutor by LibreOffice
    def __init__(self, ctx):
        log.info("init")
        self.ctx = ctx
        desktop = self.ctx.ServiceManager.createInstanceWithContext( "com.sun.star.frame.Desktop", self.ctx )
        self.document = desktop.getCurrentComponent()
        #self.dispatcher = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.DispatchHelper", self.ctx)


    # invoked from toolbar button
    def trigger(self, args):
        log.info(".trigger(args=%s) invoked" % repr(args))
        try:
            # merge defaults from registry + params from args
            self.params.update(settings(self.ctx).read())
            self.params.update(self.argparse(args))
            if self.params.get("debug"):
                log.root.handlers[0].setLevel(log.DEBUG)
            log.info(repr(self.params))

            # Draw/Impress?
            log.debug(dir(self.document))
            if self.document.supportsService("com.sun.star.drawing.DrawingDocument") or self.document.supportsService("com.sun.star.presentation.PresentationDocument"):
                self.assign_t()
                self.drawtranslate(self.document.getDrawPages())
                return

            # check for text selection, and switch to TradutorLibreText method then
            selection = self.document.getCurrentController().getSelection().getByIndex(0)
            if len(selection.getString()):
                self.rewrite_selection(selection)
                return

            # else iterate over text snippets
            self.assign_t()
            tree = self.document.getText().createEnumeration()
            self.traverse(tree, slow=self.params.get("slow"))
            if self.params.get("frames"):
            	self.traverse(XNamedAsEnumeration(self.document.getTextFrames()))

        except Exception as exc:
            log.error(format_exc())
            self.MessageBox(format_exc(), MsgType=ERRORBOX)
        log.info("----")

    # map self.t.translate() implementation according to settings
    def assign_t(self):
        self.t = translationbackends.assign_service(self.params)
        log.info(self.t)
    
    # break up UNO service: url query string `.pagetranslate?page&lang=en`
    def argparse(self, args):
        # parameterize leading ?action&
        args = "mode=" + args
        # key=value pairs
        params = dict(re.findall("(\w+)=([\w-]+)", args))
        # replace default locale
        if params.get("lang","-") == "locale":
            params["lang"] = self.getOoLocale()
        return params


    #-- iterate over TextContent/TextTable nodes
    def traverse(self, tree, slow=0):
        log.info("TextDocument.Enumeration…")
        while tree.hasMoreElements():
            para = tree.nextElement()
            log.info(para)
            # table/cells
            if para.supportsService("com.sun.star.text.TextTable"):
                for cellname in para.getCellNames():
                    log.debug(cellname)
                    text = para.getCellByName(cellname).getText()
                    #self.traverse(text.createEnumeration())
                    text.setString(self.t.linebreakwise(text.getString())) # or .translate #linebreakwise
            # subdocuments?
            elif para.supportsService("com.sun.star.text.TextFrame"):
                log.debug("TextFrame.Enumeration…")
                self.traverse(para.getText().createEnumeration())
            # a paragraph can be further enumerated for text portions (same character/style attributes),
            # but that will obviously slow things down further / also complicate coherent translations
            elif slow and para.supportsService("com.sun.star.text.Paragraph"): # doesn't work with com.sun.star.container.XEnumerationAccess?
                self.traverse(para.createEnumeration(), slow=0)  # list of TextPortion`s
            # normal flow text / paragraph
            elif para.supportsService("com.sun.star.text.TextContent") or para.supportsService("com.sun.star.text.TextPortion"):
                text = para.getString()
                text = self.t.translate(text)
                para.setString(text)
            else:
                log.warning("Unsupported document element.")

    #-- iterate over DrawPages and TextShapes
    def drawtranslate(self, pages):
        for pi in range(0, pages.getCount()):
            page = pages.getByIndex(pi)
            for si in range(0, page.getCount()):
                shape = page.getByIndex(si)
                if shape.supportsService("com.sun.star.drawing.TextShape"):
                    log.info(shape)
                    shape.Text.setString(self.t.translate(shape.Text.getString()))


    #-- TradutorLibreText (selection rewrite)
    def rewrite_selection(self, xTextRange):
        log.info("rewrite text selection")

        # Get selected text and language
        string = xTextRange.getString()
        if self.params["lang"] == "paragraph":
            self.params["lang"] = xTextRange.CharLocale.Language
        elif self.params["mode"] == "tradutor":
            code = self.getOoLocale()
            self.params["lang"] = self.getParaLang(xTextRange).Language
        log.debug("paragraph.lang="+self.params["lang"])

        # we kinda have to reinstantiate the backend late, because params` lang= might be hard-applied to handler (e.g. translate-python)
        self.assign_t()

        # translate/replace (plain text) with linebreaks intact
        trans = self.t.linebreakwise(string)
        trans = trans.replace('\\n',"\n").replace('\\r',"\n")
        xTextRange.setString(trans)

    # Query system locale
    def getOoLocale(self):
        self.LocaleData = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.i18n.LocaleData", self.ctx)
        L10Ncfg = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.configuration.ConfigurationProvider", self.ctx)
        nodepath = PropertyValue(Name="nodepath", Value="/org.openoffice.Setup/L10N")
        code = L10Ncfg.createInstanceWithArguments("com.sun.star.configuration.ConfigurationAccess", (nodepath,)).getByName("ooLocale")
        log.info("ooLocale="+repr(code))
        return code

    # Langinfo=(com.sun.star.i18n.LanguageCountryInfo){ Language = (string)"de", LanguageDefaultName = (string)"German", Country = (string)"DE", CountryDefaultName = (string)"Germany", Variant = (string)"" }
    def getParaLang(self, xTextRange):
        Langinfo = self.LocaleData.getLanguageCountryInfo(xTextRange.CharLocale)
        log.info("Langinfo="+repr(Langinfo))
        return Langinfo

    # user notifications
    def MessageBox(self, MsgText, MsgTitle="", MsgType=MESSAGEBOX, MsgButtons=BUTTONS_OK):
        ParentWin = self.document.getCurrentController().Frame.ContainerWindow
        ctx = uno.getComponentContext()
        sm = ctx.ServiceManager
        sv = sm.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx)
        myBox = sv.createMessageBox(ParentWin, MsgType, MsgButtons, MsgTitle, MsgText)
        return myBox.execute()



# Handler for settings-embedded DialogOptions.xdl window, and read/write access to our leaf in the office registry.
# (This is fairly generic/reusable, because it directly maps a dict to/from the dialog widgets.)
#
class settings(unohelper.Base, XContainerWindowEventHandler, XServiceInfo):
    impl_id = "vnd.include-once.OptionsPageTranslate"

    def __init__(self, ctx, *kargs):
        self.access = self.updatemgr(ctx)
        #log.debug(dir(self.access)) → ['AsProperty', 'ElementNames', 'ElementType', 'HierarchicalName', 'HierarchicalPropertySetInfo', 'ImplementationId', 'ImplementationName', 'Name', 'PendingChanges', 'Properties', 'PropertySetInfo', 'SupportedServiceNames', 'Types', 'addChangesListener', 'addContainerListener', 'addEventListener', 'addPropertiesChangeListener', 'addPropertyChangeListener', 'addVetoableChangeListener', 'api_key', 'api_key', 'commitChanges', 'composeHierarchicalName', 'debug', 'debug', 'deepl_api', 'deepl_api', 'deepl_web', 'deepl_web', 'dispose', 'firePropertiesChangeEvent', 'getAsProperty', 'getByHierarchicalName', 'getByName', 'getElementNames', 'getElementType', 'getExactName', 'getHierarchicalName', 'getHierarchicalPropertySetInfo', 'getHierarchicalPropertySetInfo', 'getHierarchicalPropertyValue', 'getHierarchicalPropertyValues', 'getImplementationId', 'getImplementationName', 'getName', 'getPendingChanges', 'getProperties', 'getPropertyByHierarchicalName', 'getPropertyByName', 'getPropertySetInfo', 'getPropertySetInfo', 'getPropertyValue', 'getPropertyValues', 'getSupportedServiceNames', 'getTypes', 'google', 'google', 'hasByHierarchicalName', 'hasByName', 'hasElements', 'hasPendingChanges', 'hasPropertyByHierarchicalName', 'hasPropertyByName', 'queryAdapter', 'queryInterface', 'removeChangesListener', 'removeContainerListener', 'removeEventListener', 'removePropertiesChangeListener', 'removePropertyChangeListener', 'removeVetoableChangeListener', 'replaceByHierarchicalName', 'replaceByName', 'setHierarchicalPropertyValue', 'setHierarchicalPropertyValues', 'setName', 'setPropertyValue', 'setPropertyValues', 'supportsService']

    # get handle on OpenOffice registry (read/write)
    def updatemgr(self, ctx, registry="/vnd.include-once.pagetranslate.Options/Leaves/Flags"):
        try:
            nodepath = PropertyValue(Name="nodepath", Value=registry)
            config = ctx.ServiceManager.createInstanceWithContext("com.sun.star.configuration.ConfigurationProvider", ctx)
            return config.createInstanceWithArguments("com.sun.star.configuration.ConfigurationUpdateAccess", (nodepath,))
        except:
            log.error(format_exc())

    # read/store config dict
    def read(self):
        return dict((name, self.access.getByName(name)) for name in self.access.getElementNames())
    def write(self, cfg):
        for name, value in cfg.items():
            if self.access.hasByName(name):
                self.access.setPropertyValue(name, value)
        self.access.commitChanges()

    # invoked on dialog initialization or for saving
    def callHandlerMethod(self, window=".UnoDialogControl", action="initialize|ok|back", name="external_event"):
        try:
            log.debug("callHandlerMethod({}, {}, {})".format(repr(window), action, name))
            params = self.read()
            log.info(repr(params))
            # iterate over all dialog controls by name, and assign from/to config dict
            for name, cntrl in [(c.Model.Name, c) for c in window.getControls()]:
                if action == "initialize":
                    self.setControlValue(cntrl, params.get(name))
                elif action == "ok":
                    params[name] = self.getControlValue(cntrl)
            if action == "ok":
                self.write(params)
        except:
            log.error(format_exc())
        return True
    # deal with CheckBox/TextEdit control differences
    def getControlValue(self, c):
        if hasattr(c, "State"): return int(1 if c.State else 0)
        elif hasattr(c, "Text"): return str(c.Text)
    def setControlValue(self, c, value):
        if hasattr(c, "State"): c.State = int(value if value else 0)
        elif hasattr(c, "Text"): c.Text = str(value if value else "")
   

    # XContainerWindowEventHandler
    def getSupportedMethodNames(self): return ("external_event",)
    # XServiceInfo
    def supportsService(self, name): return (name == self.impl_id)
    def getImplementationName(self): return self.impl_id
    def getSupportedServiceNames(self): return (self.impl_id,)
    def getServiceNames(self): return (self.impl_id,)



# register with LibreOffice
g_ImplementationHelper = unohelper.ImplementationHelper()
g_ImplementationHelper.addImplementation( pagetranslate, "org.openoffice.comp.pyuno.pagetranslate", ("com.sun.star.task.Job",), )
g_ImplementationHelper.addImplementation( settings, settings.impl_id, () )