LibreOffice plugin to pipe whole Writer documents through Google Translate, that ought to keep most of the page formatting.

⌈⌋ branch:  PageTranslate


Artifact [276b173dd7]

Artifact 276b173dd72bbcf960ea17430cbb631241d0d94a:

  • File pagetranslate.py — part of check-in [b469e6efc0] at 2022-11-15 17:47:11 on branch trunk — prepare for language list updates (seems shorter than default list however) (user: mario size: 23095)

#!/usr/bin/python
# encoding: utf-8
# api: uno
##type: callback
# category: language
# title: PageTranslate
# description: Action button to get whole Writer document translated
# version: 2.1.0
# state: stable
# author: mario
# url: https://fossil.include-once.org/pagetranslate/
# depends: python:requests (>= 2.5), python:uno
# pack: *.py, pythonpath/*.py, META-INF/*, pkg-desc, *.x*, icons/*
# config:
#    { name: frames, type: bool, value: 0, description: traverse TextFrames }
#    { name: quick, type: bool, value: 0, description: newline placeholders }
#    { name: slow, type: bool, value: 0, description: traverse TextPortions }
#    { name: debug, type: bool, value: 1, description: default logging level }
#    { name: flag, type: str, value: "locale", description: second btn action }
#    { name: annotate, type: bool, value: 0, description: original into comment }
# license: GNU LGPL 2.1
# forked-from: TradutorLibreText (Claudemir de Almeida Rosa)
# pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring, line-too-long
# pylint: disable=import-error, invalid-name, broad-except, super-init-not-called
#
# LibreOffice plugin for translating documents that's supposed to retain formatting.
# Per default does not require a text selection to operate, but works on the whole
# page.
# The original mode (TradutorLibreText) is still supported and used whenever a text
# portion is selected. It also uses the default target language (English) then.
# Unless a different mode/language from the Tools➜PageTranslate menu is requested.
#
# Beware that Writer freezes during the dozens of translation calls to Google.
# In particular long documents might take ages, because each paragraph/line or
# text longer 1900 chars causes another roundtrip.
# Draw/Impress documents have basic support (no text selection mode there).
#
# There's a configuration dialog under Tools➜Options➜[Language➜PageTranslate].
# Where you can switch the translation service, and set a few options. You'll
# need an API key for DeepL API or Microsoft Translator. Or set an email for
# MyMemory, or a command for using a CLI translation program. Other services
# are provided by deep-translator. (Use bundled 20MB extension release.)
#
#  · Always creates a log file: /tmp/pagetranslate-libreoffice.log
#  · Without pythonpath/ populated, this plugin won't work on Windows
#    installations fully (only the Google Translate option is likely to).
#  · Backends are defined in `pythonpath/translationbackends.py`
#  · Exception & FromTo dialogs are manually instantiated in pt_dialogs.py
#


# core modules
import os
import sys
import re
from traceback import format_exc
from tempfile import gettempdir
from datetime import datetime
import logging as log
# OpenOffice UNO bridge
import uno # pylint: disable=unused-import
import unohelper
from unocompat import PropertyValue, XNamedAsEnumeration, MessageBox, with_properties
from com.sun.star.task import XJobExecutor
from com.sun.star.awt import XActionListener, XContainerWindowEventHandler
from com.sun.star.lang import XServiceInfo, Locale
from com.sun.star.util import Date, DateTime
# pythonpath/*.py modules
import translationbackends
import pt_dialogs
# log setup
log.basicConfig(filename='%s/pagetranslate-libreoffice.log'%gettempdir(), level=log.DEBUG)
sys.excepthook = lambda *exc: log.critical(format_exc())


# Office plugin
class PageTranslate(unohelper.Base, XJobExecutor):

    # defaults + config + command args
    # pylint: disable=bad-whitespace
    params = dict(
        mode = "page",      # "trigger"/"page", or "tradutor"
        lang = "en",        # target language, "flag", "paragraph", "locale", "select"
        frames = 0,         # also process TextFrames (subdocuments)
        quick = 0,          # temporary newline placeholders, or split/iterate over text sections
        slow = 0,           # further split over paragraph segments/formatting (super slow mode)
        selectonly = 0,     # Tradutor-mode (no document translation, only ever act on selection)
        debug = 1,          # logging level
        annotate = 1,       # inject annotation/comments with original text whereever replaced
        backend = "Google", # backend to use, (string name replaces old flags)
        api_key = "",       # API key
        email = "",         # MyMemory email
        cmd = "translate-cli -o -f auto -t {lang} {text}",  # cli tool
        flag = "locale",    # default lang for secondary 🏴 button
    )

    log = log.getLogger("PageTranslate")

    # gets instantiated as XJobExecutor by LibreOffice
    def __init__(self, ctx):
        self.log.info("__init__()")

        self.ctx = ctx
        self.desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
        self.document = self.desktop.getCurrentComponent()
        self.config = settings(ctx)

        pt_dialogs.remote_ctx = ctx
        self.t = None # pylint: disable=invalid-name

    # merge defaults from registry + params from args
    def update_params(self, args):

        # update from registry options
        self.params.update(self.config.read())
        self.params["office"] = self.config.get_office_version()

        # service query string `.pagetranslate?page&lang=en`
        self.params.update(
            dict(re.findall(r"(\w+)=([\w-]+)", "mode="+args))
        )

        # prepare some flags
        self.params["from"] = "auto"
        if self.params.get("debug"):
            self.log.root.handlers[0].setLevel(log.DEBUG)

        if self.params.get("lang") == "flag":
            # compound flag option like `backend=mymemory&lang=it`
            if self.params.get("flag").find("=") > 0:
                self.params.update(
                    re.findall(r"(\w+)=([^&=]+)", self.params["flag"])
                )
            else:
                self.params["lang"] = self.params.get("flag", "locale")
        if self.params.get("lang") == "locale":
            self.params["lang"] = self.get_office_locale()
        if self.params.get("lang") == "select" or self.params.get("from") == "select":
            self.params["from"], self.params["lang"] = pt_dialogs.langselect()

        self.log.info("params = %r", self.params)

    # invoked from toolbar button
    def trigger(self, args):
        self.log.info(".trigger(args = %r) ---- invoked ----", args)
        try:
            self.update_params(args)

            # Draw/Impress?
            self.log.debug("document = <%s>", str(self.document))
            if self.document.supportsService("com.sun.star.drawing.DrawingDocument") or self.document.supportsService("com.sun.star.presentation.PresentationDocument"):
                self.assign_t()
                self.drawtranslate(self.document.getDrawPages())
                return

            # check for text selection, and switch to TradutorLibreText method then
            selection = self.document.getCurrentController().getSelection().getByIndex(0)
            if len(selection.getString()): # pylint: disable=len-as-condition
                self.rewrite_selection(selection)
                return
            if self.params["selectonly"]:
                raise Warning("Select-only mode enabled; aborting for there was no text selection")

            # else iterate over paragraphs in TextDocument
            self.assign_t()
            tree = self.document.getText().createEnumeration()
            self.traverse(tree, slow=self.params.get("slow"))
            # plus TextFrames
            if self.params.get("frames"):
                self.traverse(XNamedAsEnumeration(self.document.getTextFrames()), section="DocumentFrames")

        # show message box for errors from wherever
        except Exception as exc:
            dump = format_exc()
            self.log.error(dump)
            try:
                pt_dialogs.exception(err=str(exc), exc=dump)
            except Exception:
                MessageBox(self.desktop, dump)
        finally:
            self.log.info(".trigger: ---- done ----")

    # map self.t.translate() implementation according to settings
    def assign_t(self):
        self.t = translationbackends.assign_service(self.params)
        self.log.info(".assign_t = %s %s", str(self.t), str(self.t.translate))

    #-- iterate over TextContent/TextTable nodes
    def traverse(self, tree, slow=0, section="TextDocument"):
        self.log.info(".traverse: <%s>…", section)
        while tree.hasMoreElements():
            para = tree.nextElement()
            self.log.debug("para = <%s>", str(para))

            # table/cells
            if para.supportsService("com.sun.star.text.TextTable"):
                for cellname in para.getCellNames():
                    self.log.debug("table/cells:cellname = %r", cellname)
                    text = para.getCellByName(cellname).getText()
                    # not an enumeration, but simple linebreak-formatting in cells
                    orig = text.getString()
                    if self.t.skip(orig):  # ignore "empty" cells
                        continue
                    text.setString(self.t.linebreakwise(orig))  # translate table cell content block-wise
                    self.add_comment(text, orig)
            # subdocuments?
            elif para.supportsService("com.sun.star.text.TextFrame"):
                self.traverse(para.getText().createEnumeration(), section="TextFrame")
            # ignore existing textfields/comments?
            elif para.supportsService("com.sun.star.text.textfield.Annotation"):
                pass
            elif para.supportsService("com.sun.star.text.XTextField"):
                pass
            # a paragraph can be further enumerated for text portions (same character/style attributes),
            # but that will obviously slow things down further / also complicate coherent translations
            elif slow and para.supportsService("com.sun.star.text.Paragraph"): # doesn't work with com.sun.star.container.XEnumerationAccess?
                self.set_para_locale(para, self.params["lang"])
                self.traverse(para.createEnumeration(), slow=0, section="TextPortion")  # "slow mode" iterates TextPortions
            # normal flow text / paragraph
            elif para.supportsService("com.sun.star.text.TextContent") or para.supportsService("com.sun.star.text.TextPortion"):
                orig = para.getString()
                if self.t.skip(orig):  # preempt short/empty segments (incidentally preserves Annotations and TextFields)
                    continue
                text = self.t.translate(orig)  # translate whole paragraph content
                para.setString(text)
                self.add_comment(para, orig)  # inject annotation with previous text
                self.set_para_locale(para, self.params["lang"]) # CharLocale.Language=target
            else:
                self.log.warning(".traverse: Unsupported document element.")

    # inject comment for translated paragraphs / textportions / also works for xtextrange!
    def add_comment(self, para, text=""):
        if not self.params.get("annotate"):
            return
        dt = datetime.now()
        comment = with_properties(
            self.document.createInstance("com.sun.star.text.textfield.Annotation"),
            Content = text,
            Author = u"[PageTranslate→" + self.params["lang"] + "]",
            #Resolved = False, # AttributeError in AOO
            Date = with_properties(Date(), Year=dt.year, Month=dt.month, Day=dt.day),
            DateTimeValue = with_properties(DateTime(), Year=dt.year, Month=dt.month, Day=dt.day, Hours=dt.hour, Minutes=dt.minute, Seconds=dt.second),
        )
        try:
            comment.attach(para.getStart())
        except Exception:
            para.getText().insertTextContent(para.getEnd(), comment, False)

    # set CharLocale for replaced text, if one was present (avoid overriding for TextPortions)
    def set_para_locale(self, para, lang="en"):
        if not para.CharLocale.Language:
            return
        # kinda have to populate Country= to avert `Mixed Languages` in toolbar
        para.CharLocale = with_properties(
            Locale(),
            Language=lang, Country=self.country(lang), Variant=""
        )
        #log.debug("changing=%s", para.CharLocale)

    @staticmethod
    def country(lang):
        # Static text list to resolve prime country code from.
        # [f"{l.Language}-{l.Country}--{l.Variant}" for l in self.locale_data().getAllInstalledLocaleNames()]
        langs = """
        en-US-- en-AU-- en-BZ-- en-CA-- en-GB-- en-IE-- en-JM-- en-NZ-- en-PH-- en-TT-- en-ZA-- en-ZW-- en-NA-- en-GH--
        en-MW-- en-GM-- en-BW-- en-ZM-- en-LK-- en-NG-- en-KE-- en-DK-- en-MU-- es-ES-- es-AR-- es-BO-- es-CL-- es-CO--
        es-CR-- es-DO-- es-EC-- es-GT-- es-HN-- es-MX-- es-NI-- es-PA-- es-PE-- es-PR-- es-PY-- es-SV-- es-UY-- es-VE--
        gl-ES-- qlt-ES--oc-ES-aranes de-DE-- de-AT-- de-CH-- de-LI-- de-LU-- fr-FR-- fr-BE-- fr-CA-- fr-CH-- fr-LU--
        fr-MC-- fr-BF-- fr-CI-- fr-ML-- fr-SN-- fr-BJ-- fr-NE-- fr-TG-- it-IT-- it-CH-- sl-SI-- sv-SE-- sv-FI-- ca-ES--
        qlt-ES--ca-ES-valencia cs-CZ-- sk-SK-- da-DK-- el-GR-- fi-FI-- is-IS-- nl-BE-- nl-NL-- no-NO-- nn-NO-- nb-NO--
        nds-DE-- pl-PL-- pt-BR-- pt-PT-- ru-RU-- tr-TR-- tt-RU-- et-EE-- vro-EE-- lb-LU-- lt-LT-- lv-LV-- uk-UA--
        ro-RO-- cy-GB-- bg-BG-- qlt-ME--sr-Latn-ME qlt-RS--sr-Latn-RS qlt-CS--sr-Latn-CS sr-ME-- sr-RS-- sr-CS--
        hr-HR-- bs-BA-- eu-ES-- fo-FO-- ga-IE-- gd-GB-- ka-GE-- be-BY-- kl-GL-- mk-MK-- br-FR-- la-VA-- cv-RU-- wa-BE--
        fur-IT-- gsc-FR-- fy-NL-- qlt-FR--oc-FR-lengadoc mt-MT-- sc-IT-- ast-ES-- ltg-LV-- hsb-DE-- dsb-DE-- rue-SK--
        an-ES-- myv-RU-- lld-IT-- cu-RU-- vec-IT-- szl-PL-- lij-IT-- ja-JP-- ko-KR-- zh-CN-- zh-HK-- zh-SG-- zh-TW--
        zh-MO-- en-HK-- ar-EG-- ar-DZ-- ar-LB-- ar-SA-- ar-TN-- he-IL-- hi-IN-- kn-IN-- ta-IN-- te-IN-- gu-IN-- mr-IN--
        pa-IN-- bn-IN-- or-IN-- en-IN-- ml-IN-- bn-BD-- th-TH-- af-ZA-- hu-HU-- id-ID-- ms-MY-- en-MY-- ia---
        qlt-MN--mn-Cyrl-MN az-AZ-- sw-TZ-- km-KH-- lo-LA-- rw-RW-- eo--- dz-BT-- ne-NP-- zu-ZA-- nso-ZA-- vi-VN--
        tn-ZA-- xh-ZA-- st-ZA-- ss-ZA-- ve-ZA-- nr-ZA-- ts-ZA-- qlt-TR--kmr-Latn-TR ak-GH-- af-NA-- am-ET-- ti-ER--
        tg-TJ-- ky-KG-- kk-KZ-- fa-IR-- qlt-GH--ha-Latn-GH ee-GH-- sg-CF-- lg-UG-- uz-UZ-- ln-CD-- hy-AM-- hil-PH--
        so-SO-- gug-PY-- tk-TM-- my-MM-- shs-CA-- tpi-PG-- ar-OM-- ug-CN-- om-ET-- plt-MG-- mai-IN-- yi-US-- haw-US--
        lif-NP-- ur-PK-- ht-HT-- jbo--- kab-DZ-- pt-AO-- pjt-AU-- pap-BQ-- pap-CW-- ebo-CG-- tyx-CG-- axk-CG-- beq-CG--
        bkw-CG-- bvx-CG-- dde-CG-- iyx-CG-- kkw-CG-- kng-CG-- ldi-CG-- mdw-CG-- mkw-CG-- njx-CG-- ngz-CG-- njy-CG--
        puu-CG-- sdj-CG-- tek-CG-- tsa-CG-- vif-CG-- xku-CG-- yom-CG-- sid-ET-- bo-CN-- bo-IN-- ar-AE-- ar-KW-- bm-ML--
        pui-CO-- lgr-SB-- mos-BF-- ny-MW-- ar-BH-- ar-IQ-- ar-JO-- ar-LY-- ar-MA-- ar-QA-- ar-SY-- ar-YE-- ilo-PH--
        qlt-NG--ha-Latn-NG min-ID-- sun-ID-- en-IL-- pdc-US-- dv-MV--
        """
        country = re.findall(r"(?:^|\s)"+lang+r"-(\w+)--", langs)
        if country:
            return country[0]
        return ""

    #-- iterate over DrawPages and TextShapes
    def drawtranslate(self, pages):
        for page_index in range(0, pages.getCount()):
            page = pages.getByIndex(page_index)
            for shape_index in range(0, page.getCount()):
                shape = page.getByIndex(shape_index)
                if shape.supportsService("com.sun.star.drawing.TextShape"):
                    self.log.debug(".drawtranslate: shape = <%s>", str(shape))
                    text = shape.Text.getString()
                    if self.t.skip(text):
                        continue
                    shape.Text.setString(self.t.translate(text))

    #-- TradutorLibreText (selection rewrite)
    def rewrite_selection(self, xTextRange):
        self.log.info(".rewrite_selection() ---- begin ----")

        # Get selected text and language
        string = xTextRange.getString()
        if self.params["lang"] == "paragraph":
            self.params["lang"] = xTextRange.CharLocale.Language
        elif self.params["mode"] == "tradutor":
            self.params["lang"] = self.get_para_locale(xTextRange).Language
        self.log.debug("paragraph.lang = %s", self.params["lang"])

        # instantiate the backend after we got the language now
        self.assign_t()

        # translate/replace (plain text) with linebreaks intact
        trans = self.t.linebreakwise(string)
        trans = trans.replace('\\n',"\n").replace('\\r',"\n")
        self.log.info(".setString from %r to (%s) = %r", string, self.params["lang"], trans)
        xTextRange.setString(trans)
        if len(string) >= 20:
            self.add_comment(xTextRange, string)  # comment original if of sufficient length

    # Query system locale
    def get_office_locale(self):
        locale_cfg = self.config.updatemgr(registry="/org.openoffice.Setup/L10N", update="")
        code = locale_cfg.getByName("ooLocale")
        self.log.info("office_locale = %r", code)
        return code

    # Langinfo=(com.sun.star.i18n.LanguageCountryInfo){ Language = (string)"de", LanguageDefaultName = (string)"German", Country = (string)"DE", CountryDefaultName = (string)"Germany", Variant = (string)"" }
    def get_para_locale(self, xTextRange):
        lang_info = self.locale_data().getLanguageCountryInfo(xTextRange.CharLocale)
        self.log.info("lang_info = <%s>", str(lang_info))
        return lang_info # just using .Language in rewrite_selection()

    def locale_data(self):
        return self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.i18n.LocaleData", self.ctx)


# XActionListener for callbacks
class CallbackListener(unohelper.Base, XActionListener):
    def __init__(self, callback):
        self.callback = callback

    def actionPerformed(self, *args): # pylint: disable=unused-argument
        log.info(self.callback)
        self.callback()
        return True

    def disposing(self, *x):
        pass

# Handler for settings-embedded DialogOptions.xdl window, and read/write access to our leaf in the office registry.
# (This is fairly generic/reusable, because it directly maps a dict to/from the dialog widgets.)
#
class settings(unohelper.Base, XContainerWindowEventHandler, XServiceInfo):
    impl_id = "vnd.include-once.OptionsPageTranslate"
    btn_map = {
        "cfg_argos": "PYTHONPATH= argos-translate-gui &",
        "cfg_deps": "x-terminal-emulator -e 'pip install -U requests deep-translator argos-translate' &",
        "cfg_log": "xdg-open /tmp/pagetranslate-libreoffice.log &",
    }

    def __init__(self, ctx, *args):
        self.ctx = ctx
        self.log = log.getLogger("ConfigDialog")
        self.log.info(".__init__(%r)", args)
        self.access = self.updatemgr()
        self.log.debug("access = <%s>", str(dir(self.access)))

    # get handle on OpenOffice registry (read/write)
    def updatemgr(self, registry="/vnd.include-once.pagetranslate.Options/Leaves/Flags", update="Update"):
        try:
            nodepath = PropertyValue(Name="nodepath", Value=registry)
            config = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.configuration.ConfigurationProvider", self.ctx)
            return config.createInstanceWithArguments("com.sun.star.configuration.Configuration"+update+"Access", (nodepath,))
        except Exception:
            self.log.error(".updatemgr(): %s", format_exc())

    # read/store config dict
    def read(self):
        self.log.debug(".read()")
        try:
            return dict((name, self.access.getByName(name)) for name in self.access.getElementNames())
        except Exception:
            self.log.error(".updatemgr(): %s", format_exc())
            return {}
    def write(self, cfg):
        for name, value in cfg.items():
            if self.access.hasByName(name):
                self.access.setPropertyValue(name, value)
        self.access.commitChanges()

    # invoked on dialog initialization or for saving
    def callHandlerMethod(self, window=".UnoDialogControl", action="initialize|ok|back", event="external_event"):
        self.log.debug(".callHandlerMethod(%r, %s, %s)", type(window), action, event)
        try:
            params = self.read()
            self.log.info("params = %s", str(params))
            # iterate over all dialog controls by name, and assign from/to config dict
            for name, cntrl in [(c.Model.Name, c) for c in window.getControls()]:
                #self.log.debug("widget="+name)
                if name in self.btn_map:
                    cntrl.addActionListener(CallbackListener(lambda cmd=self.btn_map[name]: os.system(cmd)))
                elif action == "initialize":
                    self.set_control_value(cntrl, params.get(name))
                elif action == "ok":
                    params[name] = self.get_control_value(cntrl)
            if action == "ok":
                self.write(params)
        except Exception:
            self.log.error(format_exc())
        return True

    # deal with CheckBox/TextEdit control differences
    @staticmethod
    def get_control_value(ctrl):
        if hasattr(ctrl, "State"):
            return int(1 if ctrl.State else 0)
        if hasattr(ctrl, "Text"):
            return str(ctrl.Text)
        if hasattr(ctrl, "getSelectedItem"):
            return str(ctrl.getSelectedItem())
        return None
    @staticmethod
    def set_control_value(ctrl, value):
        if hasattr(ctrl, "State"):
            ctrl.State = int(value if value else 0)
        elif hasattr(ctrl, "Text"):
            ctrl.Text = str(value if value else "")
        elif hasattr(ctrl, "selectItem"):
            ctrl.selectItem(str(value if value else ""), True)

    # XContainerWindowEventHandler
    @staticmethod
    def getSupportedMethodNames():
        return ("external_event",)
    # XServiceInfo
    def supportsService(self, name):
        return name == self.impl_id
    def getImplementationName(self):
        return self.impl_id
    def getSupportedServiceNames(self):
        return (self.impl_id,)
    def getServiceNames(self):
        return (self.impl_id,)

    def get_office_version(self):
        """ return Open/LibreOffice version string """
        try:
            access = self.updatemgr(registry="/org.openoffice.Setup/Product", update="")
            return access.getByName("ooName") + "/" + access.getByName("ooSetupVersion")
            # ooSetupVersionAboutBox: 7.4.2.3, ooVendor: The Document Foundation
        except Exception:
            return "LibreOffice/7.x"


# register with LibreOffice
g_ImplementationHelper = unohelper.ImplementationHelper()
g_ImplementationHelper.addImplementation(PageTranslate, "org.openoffice.comp.pyuno.pagetranslate", ("com.sun.star.task.Job",),)
g_ImplementationHelper.addImplementation(settings, settings.impl_id, ())