LibreOffice plugin to pipe whole Writer documents through Google Translate, that ought to keep most of the page formatting.

⌈⌋ branch:  PageTranslate


Check-in [a80c65db71]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add `selectonly` mode as per user request.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: a80c65db711e2cc51968f55895ec3eb4b2455f2a
User & Date: mario 2021-12-30 02:07:44
Context
2021-12-30
02:10
Added pt-BR (Brazilian-Portuguese) menu entry check-in: a1aea47667 user: mario tags: trunk
02:07
Add `selectonly` mode as per user request. check-in: a80c65db71 user: mario tags: trunk
2021-09-16
14:59
Hurried fixes for --merriamwebster and --synonyms.com check-in: b0666af4c8 user: mario tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to OptionsDialog.xdl.

1
2
3
4
5
6
7
8
9
10



11
12
13
14
15
16
17
18
19
20
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE dlg:window PUBLIC "-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "dialog.dtd">
<dlg:window xmlns:dlg="http://openoffice.org/2000/dialog" xmlns:script="http://openoffice.org/2000/script" dlg:id="OptionsPageTranslate" dlg:left="110" dlg:top="50" dlg:width="283" dlg:height="258" dlg:help-url="HIDID" dlg:closeable="true" dlg:moveable="true" dlg:title="Title" dlg:withtitlebar="false">
 <dlg:styles>
  <dlg:style dlg:style-id="0" dlg:border="none" dlg:font-height="8"/>
 </dlg:styles>
 <dlg:bulletinboard>
  <dlg:textfield dlg:id="api_key" dlg:tab-index="4" dlg:left="169" dlg:top="20" dlg:width="75" dlg:height="10" dlg:help-text="Key required for DeepL, Microsoft Translator, or Google Translate API (not implemented here)" dlg:help-url="HIDID"/>
  <dlg:checkbox dlg:id="frames" dlg:tab-index="1" dlg:left="10" dlg:top="100" dlg:width="100" dlg:height="10" dlg:help-text="Traverse subdocuments (frames / floating frames) as well" dlg:help-url="HIDID" dlg:value="also iterate over TextFrames" dlg:checked="false"/>
  <dlg:checkbox dlg:id="quick" dlg:tab-index="0" dlg:left="10" dlg:top="87" dlg:width="111" dlg:height="10" dlg:help-text="Temporary placeholders instead of iterating over newline breaks. (Only tested with Google Translate. Might screw up others.)" dlg:help-url="HIDID" dlg:value="quick paragraph linebreak handling" dlg:checked="false"/>



  <dlg:checkbox dlg:id="debug" dlg:tab-index="3" dlg:left="10" dlg:top="126" dlg:width="100" dlg:height="10" dlg:help-text="Log file in /tmp/pagetranslate-libreoffice.txt" dlg:help-url="HIDID" dlg:value="additonal debugging" dlg:checked="true"/>
  <dlg:textfield dlg:id="email" dlg:tab-index="5" dlg:left="169" dlg:top="42" dlg:width="75" dlg:height="10" dlg:help-text="MyMemory asks for an email addres (does not require it)" dlg:help-url="HIDID"/>
  <dlg:checkbox dlg:id="slow" dlg:tab-index="2" dlg:left="10" dlg:top="113" dlg:width="109" dlg:height="10" dlg:help-text="Split sentences on formatting prior translation (= more roundtrips, less cohesive sentence structure / translation)" dlg:help-url="HIDID" dlg:value="slow mode (more inline formatting)" dlg:checked="false"/>
  <dlg:fixedline dlg:id="FixedLine1" dlg:tab-index="6" dlg:left="5" dlg:top="75" dlg:width="117" dlg:height="9" dlg:value="Options"/>
  <dlg:fixedline dlg:id="FixedLine3" dlg:tab-index="7" dlg:left="5" dlg:top="5" dlg:width="117" dlg:height="7" dlg:value="Service"/>
  <dlg:fixedline dlg:id="FixedLine2" dlg:tab-index="8" dlg:left="131" dlg:top="5" dlg:width="115" dlg:height="8" dlg:value="Parameters"/>
  <dlg:fixedline dlg:id="Label1" dlg:tab-index="9" dlg:left="137" dlg:top="20" dlg:width="23" dlg:height="8" dlg:value="API key "/>
  <dlg:fixedline dlg:id="Label2" dlg:tab-index="10" dlg:left="137" dlg:top="42" dlg:width="28" dlg:height="8" dlg:printable="false" dlg:value="Email adr "/>
  <dlg:fixedline dlg:id="Label3" dlg:tab-index="11" dlg:left="137" dlg:top="64" dlg:width="30" dlg:height="8" dlg:value="Command "/>
  <dlg:menulist dlg:id="backend" dlg:tab-index="12" dlg:left="10" dlg:top="20" dlg:width="105" dlg:height="14" dlg:help-text="Which translation service to use. (Some might require an API key, or email address.)" dlg:spin="true" dlg:linecount="20">








<

>
>
>
|

<







1
2
3
4
5
6
7
8

9
10
11
12
13
14

15
16
17
18
19
20
21
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE dlg:window PUBLIC "-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "dialog.dtd">
<dlg:window xmlns:dlg="http://openoffice.org/2000/dialog" xmlns:script="http://openoffice.org/2000/script" dlg:id="OptionsPageTranslate" dlg:left="110" dlg:top="50" dlg:width="283" dlg:height="258" dlg:help-url="HIDID" dlg:closeable="true" dlg:moveable="true" dlg:title="Title" dlg:withtitlebar="false">
 <dlg:styles>
  <dlg:style dlg:style-id="0" dlg:border="none" dlg:font-height="8"/>
 </dlg:styles>
 <dlg:bulletinboard>
  <dlg:textfield dlg:id="api_key" dlg:tab-index="4" dlg:left="169" dlg:top="20" dlg:width="75" dlg:height="10" dlg:help-text="Key required for DeepL, Microsoft Translator, or Google Translate API (not implemented here)" dlg:help-url="HIDID"/>

  <dlg:checkbox dlg:id="quick" dlg:tab-index="0" dlg:left="10" dlg:top="87" dlg:width="111" dlg:height="10" dlg:help-text="Temporary placeholders instead of iterating over newline breaks. (Only tested with Google Translate. Might screw up others.)" dlg:help-url="HIDID" dlg:value="quick paragraph linebreak handling" dlg:checked="false"/>
  <dlg:checkbox dlg:id="frames" dlg:tab-index="1" dlg:left="10" dlg:top="100" dlg:width="100" dlg:height="10" dlg:help-text="Traverse subdocuments (frames / floating frames) as well" dlg:help-url="HIDID" dlg:value="also iterate over TextFrames" dlg:checked="false"/>
  <dlg:checkbox dlg:id="slow" dlg:tab-index="2" dlg:left="10" dlg:top="113" dlg:width="109" dlg:height="10" dlg:help-text="Split sentences on formatting prior translation (= more roundtrips, less cohesive sentence structure / translation)" dlg:help-url="HIDID" dlg:value="slow mode (more inline formatting)" dlg:checked="false"/>
  <dlg:checkbox dlg:id="selectonly" dlg:tab-index="3" dlg:left="10" dlg:top="126" dlg:width="100" dlg:height="10" dlg:help-text="Disable full doc translation (Tradutor mode)" dlg:help-url="HIDID" dlg:value="Select-mode only" dlg:checked="false"/>
  <dlg:checkbox dlg:id="debug" dlg:tab-index="4" dlg:left="10" dlg:top="140" dlg:width="100" dlg:height="10" dlg:help-text="Log file in /tmp/pagetranslate-libreoffice.txt" dlg:help-url="HIDID" dlg:value="additonal debugging" dlg:checked="true"/>
  <dlg:textfield dlg:id="email" dlg:tab-index="5" dlg:left="169" dlg:top="42" dlg:width="75" dlg:height="10" dlg:help-text="MyMemory asks for an email addres (does not require it)" dlg:help-url="HIDID"/>

  <dlg:fixedline dlg:id="FixedLine1" dlg:tab-index="6" dlg:left="5" dlg:top="75" dlg:width="117" dlg:height="9" dlg:value="Options"/>
  <dlg:fixedline dlg:id="FixedLine3" dlg:tab-index="7" dlg:left="5" dlg:top="5" dlg:width="117" dlg:height="7" dlg:value="Service"/>
  <dlg:fixedline dlg:id="FixedLine2" dlg:tab-index="8" dlg:left="131" dlg:top="5" dlg:width="115" dlg:height="8" dlg:value="Parameters"/>
  <dlg:fixedline dlg:id="Label1" dlg:tab-index="9" dlg:left="137" dlg:top="20" dlg:width="23" dlg:height="8" dlg:value="API key "/>
  <dlg:fixedline dlg:id="Label2" dlg:tab-index="10" dlg:left="137" dlg:top="42" dlg:width="28" dlg:height="8" dlg:printable="false" dlg:value="Email adr "/>
  <dlg:fixedline dlg:id="Label3" dlg:tab-index="11" dlg:left="137" dlg:top="64" dlg:width="30" dlg:height="8" dlg:value="Command "/>
  <dlg:menulist dlg:id="backend" dlg:tab-index="12" dlg:left="10" dlg:top="20" dlg:width="105" dlg:height="14" dlg:help-text="Which translation service to use. (Some might require an API key, or email address.)" dlg:spin="true" dlg:linecount="20">
62
63
64
65
66
67
68
69
70
71
72
73
    <dlg:menuitem dlg:value="translate-cli -o -f auto -t {lang} {text}"/>
    <dlg:menuitem dlg:value="deep_translator -trans &quot;google&quot; -src &quot;auto&quot; -tg {lang} -txt {text}"/>
    <dlg:menuitem dlg:value="argos-translate --from-lang {from} --to-lang {lang} {text}"/>
    <dlg:menuitem dlg:value="trans -sl {from} {text} {lang}"/>
    <dlg:menuitem dlg:value="dingonyms --en-fr {text}"/>
   </dlg:menupopup>
  </dlg:combobox>
  <dlg:img dlg:style-id="0" dlg:id="logo" dlg:tab-index="23" dlg:left="200" dlg:top="135" dlg:width="40" dlg:height="40" dlg:src="vnd.sun.star.extension://vnd.include-once.pagetranslate/icons/flags.png"/>
  <dlg:button dlg:id="cfg_argos" dlg:tab-index="24" dlg:left="10" dlg:top="150" dlg:width="75" dlg:height="12" dlg:value="argos config gui">
  </dlg:button>
 </dlg:bulletinboard>
</dlg:window>







|
|
<


63
64
65
66
67
68
69
70
71

72
73
    <dlg:menuitem dlg:value="translate-cli -o -f auto -t {lang} {text}"/>
    <dlg:menuitem dlg:value="deep_translator -trans &quot;google&quot; -src &quot;auto&quot; -tg {lang} -txt {text}"/>
    <dlg:menuitem dlg:value="argos-translate --from-lang {from} --to-lang {lang} {text}"/>
    <dlg:menuitem dlg:value="trans -sl {from} {text} {lang}"/>
    <dlg:menuitem dlg:value="dingonyms --en-fr {text}"/>
   </dlg:menupopup>
  </dlg:combobox>
  <dlg:img dlg:style-id="0" dlg:id="logo" dlg:tab-index="23" dlg:left="200" dlg:top="135" dlg:width="40" dlg:height="40" dlg:src="Pictures/1000020100000080000000800BB334DDEFA5A256.png"/>
  <dlg:button dlg:id="cfg_argos" dlg:tab-index="24" dlg:left="10" dlg:top="155" dlg:width="75" dlg:height="12" dlg:value="argos config gui"/>

 </dlg:bulletinboard>
</dlg:window>

Changes to OptionsSchema.xcs.

31
32
33
34
35
36
37

38
39
40
41
42
        <prop oor:name="api_key" oor:type="xs:string"><value></value></prop>
        <prop oor:name="email" oor:type="xs:string"><value></value></prop>
        <prop oor:name="cmd" oor:type="xs:string"><value>translate-cli -o -f auto -t {lang} {text}</value></prop>
        <prop oor:name="quick" oor:type="xs:short"><value>0</value></prop>
        <prop oor:name="frames" oor:type="xs:short"><value>0</value></prop>
        <prop oor:name="slow" oor:type="xs:short"><value>0</value></prop>
        <prop oor:name="debug" oor:type="xs:short"><value>1</value></prop>

        <prop oor:name="flag" oor:type="xs:string"><value>locale</value></prop>
      </group>
    </group>
  </component>
</oor:component-schema>







>





31
32
33
34
35
36
37
38
39
40
41
42
43
        <prop oor:name="api_key" oor:type="xs:string"><value></value></prop>
        <prop oor:name="email" oor:type="xs:string"><value></value></prop>
        <prop oor:name="cmd" oor:type="xs:string"><value>translate-cli -o -f auto -t {lang} {text}</value></prop>
        <prop oor:name="quick" oor:type="xs:short"><value>0</value></prop>
        <prop oor:name="frames" oor:type="xs:short"><value>0</value></prop>
        <prop oor:name="slow" oor:type="xs:short"><value>0</value></prop>
        <prop oor:name="debug" oor:type="xs:short"><value>1</value></prop>
        <prop oor:name="selectonly" oor:type="xs:short"><value>0</value></prop>
        <prop oor:name="flag" oor:type="xs:string"><value>locale</value></prop>
      </group>
    </group>
  </component>
</oor:component-schema>

Changes to description.xml.

1
2
3
4
5
6
7
8
9
10
11
<?xml version="1.0" encoding="UTF-8"?>
<description xmlns="http://openoffice.org/extensions/description/2006" xmlns:dep="http://openoffice.org/extensions/description/2006" xmlns:xlink="http://www.w3.org/1999/xlink">
  <identifier value="vnd.include-once.pagetranslate"/>
  <version value="1.9.8"/>
  <display-name>
    <name lang="en">PageTranslate</name>
  </display-name>
  <dependencies>
    <OpenOffice.org-minimal-version value="3.0" dep:name="OpenOffice.org 3.0"/>
  </dependencies>
  <registration>



|







1
2
3
4
5
6
7
8
9
10
11
<?xml version="1.0" encoding="UTF-8"?>
<description xmlns="http://openoffice.org/extensions/description/2006" xmlns:dep="http://openoffice.org/extensions/description/2006" xmlns:xlink="http://www.w3.org/1999/xlink">
  <identifier value="vnd.include-once.pagetranslate"/>
  <version value="1.9.76"/>
  <display-name>
    <name lang="en">PageTranslate</name>
  </display-name>
  <dependencies>
    <OpenOffice.org-minimal-version value="3.0" dep:name="OpenOffice.org 3.0"/>
  </dependencies>
  <registration>

Changes to dingonyms/Makefile.

1
2
3
4
5
6
7
8


#!/usr/bin/make

whl:
	./setup.py bdist_wheel
%.1:	%.md
	pandoc --standalone -f markdown+pandoc_title_block+grid_tables -t man $< -o $@
man:	man/dingonyms.1











>
>
1
2
3
4
5
6
7
8
9
10
#!/usr/bin/make

whl:
	./setup.py bdist_wheel
%.1:	%.md
	pandoc --standalone -f markdown+pandoc_title_block+grid_tables -t man $< -o $@
man:	man/dingonyms.1

upload:
	twine upload dist/*whl

Changes to dingonyms/dingonyms.py.

295
296
297
298
299
300
301

302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
        out.site("Synonym.com")
        rx = """
            <h4\sclass="section-list-header">([\w\s-]+)</h4> |
            <li>([\w\s'.-]+)(?:\s\((.*?)\))?</li> |
            (</html>)
        """
        ls = []

        for group, add_word, defs, html in re.findall(rx, html, re.X|re.S):
            if add_word:
                ls.append(add_word)
            else:
                if ls:
                    out.alternatives(word, ls)
                    ls = []
                if group:
                    word = word + " {" + group + "}"
                    continue
                defs = re.sub('(<[^>]+>|\s+)+', " ", defs, 0, re.S).strip()
                defs = " |   ".join(textwrap.wrap(defs, 50))
                word = group + " {" + verb + "} [" + pron + "] |  (" + defs + ")"

                
    def urban(self, word):
        """ urban | u | u\w*[brn]\w* """
        html = http_get(f"https://www.urbandictionary.com/define.php?term={word}")
        out.site("UrbanDictionary.com")
        for html in re.findall('="def-panel\s*"[^>]*>(.+?)="contributor|def-footer">', html, re.S):







>





|


|

|
|
|







295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
        out.site("Synonym.com")
        rx = """
            <h4\sclass="section-list-header">([\w\s-]+)</h4> |
            <li>([\w\s'.-]+)(?:\s\((.*?)\))?</li> |
            (</html>)
        """
        ls = []
        pfx = word + " {Synonyms}"
        for group, add_word, defs, html in re.findall(rx, html, re.X|re.S):
            if add_word:
                ls.append(add_word)
            else:
                if ls:
                    out.alternatives(pfx, ls)
                    ls = []
                if group:
                    pfx = word + " {" + group + "}"
                    continue
                #defs = re.sub('(<[^>]+>|\s+)+', " ", defs, 0, re.S).strip()
                #defs = " |   ".join(textwrap.wrap(defs, 50))
                #pfx = group + " {" + verb + "} [" + pron + "] |  (" + defs + ")"

                
    def urban(self, word):
        """ urban | u | u\w*[brn]\w* """
        html = http_get(f"https://www.urbandictionary.com/define.php?term={word}")
        out.site("UrbanDictionary.com")
        for html in re.findall('="def-panel\s*"[^>]*>(.+?)="contributor|def-footer">', html, re.S):

Changes to pagetranslate.py.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#!/usr/bin/python
# encoding: utf-8
# api: uno
# type: callback
# category: language
# title: PageTranslate
# description: Action button to get whole Writer document translated
# version: 1.9.74
# state: beta
# author: mario
# url: https://fossil.include-once.org/pagetranslate/
# depends: python:requests (>= 2.5), python:uno
# pack: *.py, pythonpath/*.py, META-INF/*, pkg-desc, *.x*, icons/*
# config:
#    { name: frames, type: bool, value: 0, description: traverse TextFrames }







|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#!/usr/bin/python
# encoding: utf-8
# api: uno
# type: callback
# category: language
# title: PageTranslate
# description: Action button to get whole Writer document translated
# version: 1.9.76
# state: beta
# author: mario
# url: https://fossil.include-once.org/pagetranslate/
# depends: python:requests (>= 2.5), python:uno
# pack: *.py, pythonpath/*.py, META-INF/*, pkg-desc, *.x*, icons/*
# config:
#    { name: frames, type: bool, value: 0, description: traverse TextFrames }
75
76
77
78
79
80
81

82
83
84
85
86
87
88
    # defaults + config + command args
    params = dict(
        mode = "page",      # "trigger"/"page", or "tradutor"
        lang = "en",        # target language, or "flag", or "paragraph", "locale", "select", "mri-debug"
        frames = 0,         # also process TextFrames (subdocuments)
        quick = 0,          # use temporary newline placeholders, or split/iterate over text sections
        slow = 0,           # further split over paragraph segments/formatting (super slow mode)

        debug = 1,          # logging level
        backend = "Google", # backend to use, (string name replaces old flags)
        api_key = "",       # API key
        email = "",         # MyMemory email
        cmd = "translate-cli -o -f auto -t {lang} {text}",  # cli tool
        flag = "locale",    # default lang for secondary 🏴 button
    )







>







75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
    # defaults + config + command args
    params = dict(
        mode = "page",      # "trigger"/"page", or "tradutor"
        lang = "en",        # target language, or "flag", or "paragraph", "locale", "select", "mri-debug"
        frames = 0,         # also process TextFrames (subdocuments)
        quick = 0,          # use temporary newline placeholders, or split/iterate over text sections
        slow = 0,           # further split over paragraph segments/formatting (super slow mode)
        selectonly = 0,     # Tradutor-mode (no document translation, only ever act on selection)
        debug = 1,          # logging level
        backend = "Google", # backend to use, (string name replaces old flags)
        api_key = "",       # API key
        email = "",         # MyMemory email
        cmd = "translate-cli -o -f auto -t {lang} {text}",  # cli tool
        flag = "locale",    # default lang for secondary 🏴 button
    )
124
125
126
127
128
129
130



131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
                return

            # check for text selection, and switch to TradutorLibreText method then
            selection = self.document.getCurrentController().getSelection().getByIndex(0)
            if len(selection.getString()):
                self.rewrite_selection(selection)
                return




            # else iterate over text snippets
            self.assign_t()
            tree = self.document.getText().createEnumeration()
            self.traverse(tree, slow=self.params.get("slow"))
            if self.params.get("frames"):
                self.traverse(XNamedAsEnumeration(self.document.getTextFrames()))
        # show message box for errors from wherever
        except Exception as exc:
            self.exc(exc)
        finally:
            log.info("----")

    # central handler for errors
    def exc(self, exc, *a, **kw):
        dump = format_exc()
        log.error(dump)
        pt_dialogs.exception(err=str(exc), exc=dump)
        #except:
        #    MessageBox(self, dump)

    # map self.t.translate() implementation according to settings
    def assign_t(self):
        self.t = translationbackends.assign_service(self.params)
        log.info(self.t)
    
    # break up UNO service: url query string `.pagetranslate?page&lang=en`







>
>
>


















<
<







125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152


153
154
155
156
157
158
159
                return

            # check for text selection, and switch to TradutorLibreText method then
            selection = self.document.getCurrentController().getSelection().getByIndex(0)
            if len(selection.getString()):
                self.rewrite_selection(selection)
                return
            elif self.params["selectonly"]:
                raise Warning("Select-only mode enabled; aborting for there was no text selection")
                return

            # else iterate over text snippets
            self.assign_t()
            tree = self.document.getText().createEnumeration()
            self.traverse(tree, slow=self.params.get("slow"))
            if self.params.get("frames"):
                self.traverse(XNamedAsEnumeration(self.document.getTextFrames()))
        # show message box for errors from wherever
        except Exception as exc:
            self.exc(exc)
        finally:
            log.info("----")

    # central handler for errors
    def exc(self, exc, *a, **kw):
        dump = format_exc()
        log.error(dump)
        pt_dialogs.exception(err=str(exc), exc=dump)



    # map self.t.translate() implementation according to settings
    def assign_t(self):
        self.t = translationbackends.assign_service(self.params)
        log.info(self.t)
    
    # break up UNO service: url query string `.pagetranslate?page&lang=en`