#!/usr/bin/python
# encoding: utf-8
# api: uno
##type: callback
# category: language
# title: PageTranslate
# description: Action button to get whole Writer document translated
# version: 2.1.0
# state: stable
# author: mario
# url: https://fossil.include-once.org/pagetranslate/
# depends: python:requests (>= 2.5), python:uno
# pack: *.py, pythonpath/*.py, META-INF/*, pkg-desc, *.x*, icons/*
# config:
# { name: frames, type: bool, value: 0, description: traverse TextFrames }
# { name: quick, type: bool, value: 0, description: newline placeholders }
# { name: slow, type: bool, value: 0, description: traverse TextPortions }
# { name: debug, type: bool, value: 1, description: default logging level }
# { name: flag, type: str, value: "locale", description: second btn action }
# { name: annotate, type: bool, value: 0, description: original into comment }
# license: GNU LGPL 2.1
# forked-from: TradutorLibreText (Claudemir de Almeida Rosa)
# pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring, line-too-long
# pylint: disable=import-error, invalid-name, broad-except, super-init-not-called
#
# LibreOffice plugin for translating documents that's supposed to retain formatting.
# Per default does not require a text selection to operate, but works on the whole
# page.
# The original mode (TradutorLibreText) is still supported and used whenever a text
# portion is selected. It also uses the default target language (English) then.
# Unless a different mode/language from the Tools➜PageTranslate menu is requested.
#
# Beware that Writer freezes during the dozens of translation calls to Google.
# In particular long documents might take ages, because each paragraph/line or
# text longer 1900 chars causes another roundtrip.
# Draw/Impress documents have basic support (no text selection mode there).
#
# There's a configuration dialog under Tools➜Options➜[Language➜PageTranslate].
# Where you can switch the translation service, and set a few options. You'll
# need an API key for DeepL API or Microsoft Translator. Or set an email for
# MyMemory, or a command for using a CLI translation program. Other services
# are provided by deep-translator. (Use bundled 20MB extension release.)
#
# · Always creates a log file: /tmp/pagetranslate-libreoffice.log
# · Without pythonpath/ populated, this plugin won't work on Windows
# installations fully (only the Google Translate option is likely to).
# · Backends are defined in `pythonpath/translationbackends.py`
# · Exception & FromTo dialogs are manually instantiated in pt_dialogs.py
#
# core modules
import os
import sys
import re
from traceback import format_exc
from tempfile import gettempdir
from datetime import datetime
import logging as log
# OpenOffice UNO bridge
import uno # pylint: disable=unused-import
import unohelper
from unocompat import PropertyValue, XNamedAsEnumeration, MessageBox, with_properties
from com.sun.star.task import XJobExecutor
from com.sun.star.awt import XActionListener, XContainerWindowEventHandler
from com.sun.star.lang import XServiceInfo, Locale
from com.sun.star.util import Date, DateTime
# pythonpath/*.py modules
import translationbackends
import pt_dialogs
# log setup
log.basicConfig(filename='%s/pagetranslate-libreoffice.log'%gettempdir(), level=log.DEBUG)
sys.excepthook = lambda *exc: log.critical(format_exc())
# Office plugin
class PageTranslate(unohelper.Base, XJobExecutor):
# defaults + config + command args
# pylint: disable=bad-whitespace
params = dict(
mode = "page", # "trigger"/"page", or "tradutor"
lang = "en", # target language, "flag", "paragraph", "locale", "select"
frames = 0, # also process TextFrames (subdocuments)
quick = 0, # temporary newline placeholders, or split/iterate over text sections
slow = 0, # further split over paragraph segments/formatting (super slow mode)
selectonly = 0, # Tradutor-mode (no document translation, only ever act on selection)
debug = 1, # logging level
annotate = 1, # inject annotation/comments with original text whereever replaced
backend = "Google", # backend to use, (string name replaces old flags)
api_key = "", # API key
email = "", # MyMemory email
cmd = "translate-cli -o -f auto -t {lang} {text}", # cli tool
flag = "locale", # default lang for secondary 🏴 button
)
log = log.getLogger("PageTranslate")
# gets instantiated as XJobExecutor by LibreOffice
def __init__(self, ctx):
self.log.info("__init__()")
self.ctx = ctx
self.desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
self.document = self.desktop.getCurrentComponent()
self.config = settings(ctx)
pt_dialogs.remote_ctx = ctx
self.t = None # pylint: disable=invalid-name
# merge defaults from registry + params from args
def update_params(self, args):
# update from registry options
self.params.update(self.config.read())
self.params["office"] = self.config.get_office_version()
# service query string `.pagetranslate?page&lang=en`
self.params.update(
dict(re.findall(r"(\w+)=([\w-]+)", "mode="+args))
)
# prepare some flags
self.params["from"] = "auto"
if self.params.get("debug"):
self.log.root.handlers[0].setLevel(log.DEBUG)
if self.params.get("lang") == "flag":
# compound flag option like `backend=mymemory&lang=it`
if self.params.get("flag").find("=") > 0:
self.params.update(
re.findall(r"(\w+)=([^&=]+)", self.params["flag"])
)
else:
self.params["lang"] = self.params.get("flag", "locale")
if self.params.get("lang") == "locale":
self.params["lang"] = self.get_office_locale()
if self.params.get("lang") == "select" or self.params.get("from") == "select":
self.params["from"], self.params["lang"] = pt_dialogs.langselect()
self.log.info("params = %r", self.params)
# invoked from toolbar button
def trigger(self, args):
self.log.info(".trigger(args = %r) ---- invoked ----", args)
try:
self.update_params(args)
# Draw/Impress?
self.log.debug("document = <%s>", str(self.document))
if self.document.supportsService("com.sun.star.drawing.DrawingDocument") or self.document.supportsService("com.sun.star.presentation.PresentationDocument"):
self.assign_t()
self.drawtranslate(self.document.getDrawPages())
return
# check for text selection, and switch to TradutorLibreText method then
selection = self.document.getCurrentController().getSelection().getByIndex(0)
if len(selection.getString()): # pylint: disable=len-as-condition
self.rewrite_selection(selection)
return
if self.params["selectonly"]:
raise Warning("Select-only mode enabled; aborting for there was no text selection")
# else iterate over paragraphs in TextDocument
self.assign_t()
tree = self.document.getText().createEnumeration()
self.traverse(tree, slow=self.params.get("slow"))
# plus TextFrames
if self.params.get("frames"):
self.traverse(XNamedAsEnumeration(self.document.getTextFrames()), section="DocumentFrames")
# show message box for errors from wherever
except Exception as exc:
dump = format_exc()
self.log.error(dump)
try:
pt_dialogs.exception(err=str(exc), exc=dump)
except Exception:
MessageBox(self.desktop, dump)
finally:
self.log.info(".trigger: ---- done ----")
# map self.t.translate() implementation according to settings
def assign_t(self):
self.t = translationbackends.assign_service(self.params)
self.log.info(".assign_t = %s %s", str(self.t), str(self.t.translate))
#-- iterate over TextContent/TextTable nodes
def traverse(self, tree, slow=0, section="TextDocument"):
self.log.info(".traverse: <%s>…", section)
while tree.hasMoreElements():
para = tree.nextElement()
self.log.debug("para = <%s>", str(para))
# table/cells
if para.supportsService("com.sun.star.text.TextTable"):
for cellname in para.getCellNames():
self.log.debug("table/cells:cellname = %r", cellname)
text = para.getCellByName(cellname).getText()
# not an enumeration, but simple linebreak-formatting in cells
orig = text.getString()
if self.t.skip(orig): # ignore "empty" cells
continue
text.setString(self.t.linebreakwise(orig)) # translate table cell content block-wise
self.add_comment(text, orig)
# subdocuments?
elif para.supportsService("com.sun.star.text.TextFrame"):
self.traverse(para.getText().createEnumeration(), section="TextFrame")
# ignore existing textfields/comments?
elif para.supportsService("com.sun.star.text.textfield.Annotation"):
pass
elif para.supportsService("com.sun.star.text.XTextField"):
pass
# a paragraph can be further enumerated for text portions (same character/style attributes),
# but that will obviously slow things down further / also complicate coherent translations
elif slow and para.supportsService("com.sun.star.text.Paragraph"): # doesn't work with com.sun.star.container.XEnumerationAccess?
self.set_para_locale(para, self.params["lang"])
self.traverse(para.createEnumeration(), slow=0, section="TextPortion") # "slow mode" iterates TextPortions
# normal flow text / paragraph
elif para.supportsService("com.sun.star.text.TextContent") or para.supportsService("com.sun.star.text.TextPortion"):
orig = para.getString()
if self.t.skip(orig): # preempt short/empty segments (incidentally preserves Annotations and TextFields)
continue
text = self.t.translate(orig) # translate whole paragraph content
para.setString(text)
self.add_comment(para, orig) # inject annotation with previous text
self.set_para_locale(para, self.params["lang"]) # CharLocale.Language=target
else:
self.log.warning(".traverse: Unsupported document element.")
# inject comment for translated paragraphs / textportions / also works for xtextrange!
def add_comment(self, para, text=""):
if not self.params.get("annotate"):
return
dt = datetime.now()
comment = with_properties(
self.document.createInstance("com.sun.star.text.textfield.Annotation"),
Content = text,
Author = u"[PageTranslate→" + self.params["lang"] + "]",
#Resolved = False, # AttributeError in AOO
Date = with_properties(Date(), Year=dt.year, Month=dt.month, Day=dt.day),
DateTimeValue = with_properties(DateTime(), Year=dt.year, Month=dt.month, Day=dt.day, Hours=dt.hour, Minutes=dt.minute, Seconds=dt.second),
)
try:
comment.attach(para.getStart())
except Exception:
para.getText().insertTextContent(para.getEnd(), comment, False)
# set CharLocale for replaced text, if one was present (avoid overriding for TextPortions)
def set_para_locale(self, para, lang="en"):
if not para.CharLocale.Language:
return
# kinda have to populate Country= to avert `Mixed Languages` in toolbar
para.CharLocale = with_properties(
Locale(),
Language=lang, Country=self.country(lang), Variant=""
)
#log.debug("changing=%s", para.CharLocale)
@staticmethod
def country(lang):
# Static text list to resolve prime country code from.
# [f"{l.Language}-{l.Country}--{l.Variant}" for l in self.locale_data().getAllInstalledLocaleNames()]
langs = """
en-US-- en-AU-- en-BZ-- en-CA-- en-GB-- en-IE-- en-JM-- en-NZ-- en-PH-- en-TT-- en-ZA-- en-ZW-- en-NA-- en-GH--
en-MW-- en-GM-- en-BW-- en-ZM-- en-LK-- en-NG-- en-KE-- en-DK-- en-MU-- es-ES-- es-AR-- es-BO-- es-CL-- es-CO--
es-CR-- es-DO-- es-EC-- es-GT-- es-HN-- es-MX-- es-NI-- es-PA-- es-PE-- es-PR-- es-PY-- es-SV-- es-UY-- es-VE--
gl-ES-- qlt-ES--oc-ES-aranes de-DE-- de-AT-- de-CH-- de-LI-- de-LU-- fr-FR-- fr-BE-- fr-CA-- fr-CH-- fr-LU--
fr-MC-- fr-BF-- fr-CI-- fr-ML-- fr-SN-- fr-BJ-- fr-NE-- fr-TG-- it-IT-- it-CH-- sl-SI-- sv-SE-- sv-FI-- ca-ES--
qlt-ES--ca-ES-valencia cs-CZ-- sk-SK-- da-DK-- el-GR-- fi-FI-- is-IS-- nl-BE-- nl-NL-- no-NO-- nn-NO-- nb-NO--
nds-DE-- pl-PL-- pt-BR-- pt-PT-- ru-RU-- tr-TR-- tt-RU-- et-EE-- vro-EE-- lb-LU-- lt-LT-- lv-LV-- uk-UA--
ro-RO-- cy-GB-- bg-BG-- qlt-ME--sr-Latn-ME qlt-RS--sr-Latn-RS qlt-CS--sr-Latn-CS sr-ME-- sr-RS-- sr-CS--
hr-HR-- bs-BA-- eu-ES-- fo-FO-- ga-IE-- gd-GB-- ka-GE-- be-BY-- kl-GL-- mk-MK-- br-FR-- la-VA-- cv-RU-- wa-BE--
fur-IT-- gsc-FR-- fy-NL-- qlt-FR--oc-FR-lengadoc mt-MT-- sc-IT-- ast-ES-- ltg-LV-- hsb-DE-- dsb-DE-- rue-SK--
an-ES-- myv-RU-- lld-IT-- cu-RU-- vec-IT-- szl-PL-- lij-IT-- ja-JP-- ko-KR-- zh-CN-- zh-HK-- zh-SG-- zh-TW--
zh-MO-- en-HK-- ar-EG-- ar-DZ-- ar-LB-- ar-SA-- ar-TN-- he-IL-- hi-IN-- kn-IN-- ta-IN-- te-IN-- gu-IN-- mr-IN--
pa-IN-- bn-IN-- or-IN-- en-IN-- ml-IN-- bn-BD-- th-TH-- af-ZA-- hu-HU-- id-ID-- ms-MY-- en-MY-- ia---
qlt-MN--mn-Cyrl-MN az-AZ-- sw-TZ-- km-KH-- lo-LA-- rw-RW-- eo--- dz-BT-- ne-NP-- zu-ZA-- nso-ZA-- vi-VN--
tn-ZA-- xh-ZA-- st-ZA-- ss-ZA-- ve-ZA-- nr-ZA-- ts-ZA-- qlt-TR--kmr-Latn-TR ak-GH-- af-NA-- am-ET-- ti-ER--
tg-TJ-- ky-KG-- kk-KZ-- fa-IR-- qlt-GH--ha-Latn-GH ee-GH-- sg-CF-- lg-UG-- uz-UZ-- ln-CD-- hy-AM-- hil-PH--
so-SO-- gug-PY-- tk-TM-- my-MM-- shs-CA-- tpi-PG-- ar-OM-- ug-CN-- om-ET-- plt-MG-- mai-IN-- yi-US-- haw-US--
lif-NP-- ur-PK-- ht-HT-- jbo--- kab-DZ-- pt-AO-- pjt-AU-- pap-BQ-- pap-CW-- ebo-CG-- tyx-CG-- axk-CG-- beq-CG--
bkw-CG-- bvx-CG-- dde-CG-- iyx-CG-- kkw-CG-- kng-CG-- ldi-CG-- mdw-CG-- mkw-CG-- njx-CG-- ngz-CG-- njy-CG--
puu-CG-- sdj-CG-- tek-CG-- tsa-CG-- vif-CG-- xku-CG-- yom-CG-- sid-ET-- bo-CN-- bo-IN-- ar-AE-- ar-KW-- bm-ML--
pui-CO-- lgr-SB-- mos-BF-- ny-MW-- ar-BH-- ar-IQ-- ar-JO-- ar-LY-- ar-MA-- ar-QA-- ar-SY-- ar-YE-- ilo-PH--
qlt-NG--ha-Latn-NG min-ID-- sun-ID-- en-IL-- pdc-US-- dv-MV--
"""
country = re.findall(r"(?:^|\s)"+lang+r"-(\w+)--", langs)
if country:
return country[0]
return ""
#-- iterate over DrawPages and TextShapes
def drawtranslate(self, pages):
for page_index in range(0, pages.getCount()):
page = pages.getByIndex(page_index)
for shape_index in range(0, page.getCount()):
shape = page.getByIndex(shape_index)
if shape.supportsService("com.sun.star.drawing.TextShape"):
self.log.debug(".drawtranslate: shape = <%s>", str(shape))
text = shape.Text.getString()
if self.t.skip(text):
continue
shape.Text.setString(self.t.translate(text))
#-- TradutorLibreText (selection rewrite)
def rewrite_selection(self, xTextRange):
self.log.info(".rewrite_selection() ---- begin ----")
# Get selected text and language
string = xTextRange.getString()
if self.params["lang"] == "paragraph":
self.params["lang"] = xTextRange.CharLocale.Language
elif self.params["mode"] == "tradutor":
self.params["lang"] = self.get_para_locale(xTextRange).Language
self.log.debug("paragraph.lang = %s", self.params["lang"])
# instantiate the backend after we got the language now
self.assign_t()
# translate/replace (plain text) with linebreaks intact
trans = self.t.linebreakwise(string)
trans = trans.replace('\\n',"\n").replace('\\r',"\n")
self.log.info(".setString from %r to (%s) = %r", string, self.params["lang"], trans)
xTextRange.setString(trans)
if len(string) >= 20:
self.add_comment(xTextRange, string) # comment original if of sufficient length
# Query system locale
def get_office_locale(self):
locale_cfg = self.config.updatemgr(registry="/org.openoffice.Setup/L10N", update="")
code = locale_cfg.getByName("ooLocale")
self.log.info("office_locale = %r", code)
return code
# Langinfo=(com.sun.star.i18n.LanguageCountryInfo){ Language = (string)"de", LanguageDefaultName = (string)"German", Country = (string)"DE", CountryDefaultName = (string)"Germany", Variant = (string)"" }
def get_para_locale(self, xTextRange):
lang_info = self.locale_data().getLanguageCountryInfo(xTextRange.CharLocale)
self.log.info("lang_info = <%s>", str(lang_info))
return lang_info # just using .Language in rewrite_selection()
def locale_data(self):
return self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.i18n.LocaleData", self.ctx)
# XActionListener for callbacks
class CallbackListener(unohelper.Base, XActionListener):
def __init__(self, callback):
self.callback = callback
def actionPerformed(self, *args): # pylint: disable=unused-argument
log.info(self.callback)
self.callback()
return True
def disposing(self, *x):
pass
# Handler for settings-embedded DialogOptions.xdl window, and read/write access to our leaf in the office registry.
# (This is fairly generic/reusable, because it directly maps a dict to/from the dialog widgets.)
#
class settings(unohelper.Base, XContainerWindowEventHandler, XServiceInfo):
impl_id = "vnd.include-once.OptionsPageTranslate"
btn_map = {
"cfg_argos": "PYTHONPATH= argos-translate-gui &",
"cfg_deps": "x-terminal-emulator -e 'pip install -U requests deep-translator argos-translate' &",
"cfg_log": "xdg-open /tmp/pagetranslate-libreoffice.log &",
}
def __init__(self, ctx, *args):
self.ctx = ctx
self.log = log.getLogger("ConfigDialog")
self.log.info(".__init__(%r)", args)
self.access = self.updatemgr()
self.log.debug("access = <%s>", str(dir(self.access)))
# get handle on OpenOffice registry (read/write)
def updatemgr(self, registry="/vnd.include-once.pagetranslate.Options/Leaves/Flags", update="Update"):
try:
nodepath = PropertyValue(Name="nodepath", Value=registry)
config = self.ctx.ServiceManager.createInstanceWithContext("com.sun.star.configuration.ConfigurationProvider", self.ctx)
return config.createInstanceWithArguments("com.sun.star.configuration.Configuration"+update+"Access", (nodepath,))
except Exception:
self.log.error(".updatemgr(): %s", format_exc())
# read/store config dict
def read(self):
self.log.debug(".read()")
try:
return dict((name, self.access.getByName(name)) for name in self.access.getElementNames())
except Exception:
self.log.error(".updatemgr(): %s", format_exc())
return {}
def write(self, cfg):
for name, value in cfg.items():
if self.access.hasByName(name):
self.access.setPropertyValue(name, value)
self.access.commitChanges()
# invoked on dialog initialization or for saving
def callHandlerMethod(self, window=".UnoDialogControl", action="initialize|ok|back", event="external_event"):
self.log.debug(".callHandlerMethod(%r, %s, %s)", type(window), action, event)
try:
params = self.read()
self.log.info("params = %s", str(params))
# iterate over all dialog controls by name, and assign from/to config dict
for name, cntrl in [(c.Model.Name, c) for c in window.getControls()]:
#self.log.debug("widget="+name)
if name in self.btn_map:
cntrl.addActionListener(CallbackListener(lambda cmd=self.btn_map[name]: os.system(cmd)))
elif action == "initialize":
self.set_control_value(cntrl, params.get(name))
elif action == "ok":
params[name] = self.get_control_value(cntrl)
if action == "ok":
self.write(params)
except Exception:
self.log.error(format_exc())
return True
# deal with CheckBox/TextEdit control differences
@staticmethod
def get_control_value(ctrl):
if hasattr(ctrl, "State"):
return int(1 if ctrl.State else 0)
if hasattr(ctrl, "Text"):
return str(ctrl.Text)
if hasattr(ctrl, "getSelectedItem"):
return str(ctrl.getSelectedItem())
return None
@staticmethod
def set_control_value(ctrl, value):
if hasattr(ctrl, "State"):
ctrl.State = int(value if value else 0)
elif hasattr(ctrl, "Text"):
ctrl.Text = str(value if value else "")
elif hasattr(ctrl, "selectItem"):
ctrl.selectItem(str(value if value else ""), True)
# XContainerWindowEventHandler
@staticmethod
def getSupportedMethodNames():
return ("external_event",)
# XServiceInfo
def supportsService(self, name):
return name == self.impl_id
def getImplementationName(self):
return self.impl_id
def getSupportedServiceNames(self):
return (self.impl_id,)
def getServiceNames(self):
return (self.impl_id,)
def get_office_version(self):
""" return Open/LibreOffice version string """
try:
access = self.updatemgr(registry="/org.openoffice.Setup/Product", update="")
return access.getByName("ooName") + "/" + access.getByName("ooSetupVersion")
# ooSetupVersionAboutBox: 7.4.2.3, ooVendor: The Document Foundation
except Exception:
return "LibreOffice/7.x"
# register with LibreOffice
g_ImplementationHelper = unohelper.ImplementationHelper()
g_ImplementationHelper.addImplementation(PageTranslate, "org.openoffice.comp.pyuno.pagetranslate", ("com.sun.star.task.Job",),)
g_ImplementationHelper.addImplementation(settings, settings.impl_id, ())