LibreOffice plugin to pipe whole Writer documents through Google Translate, that ought to keep most of the page formatting.

⌈⌋ ⎇ branch:  PageTranslate


Check-in [b0666af4c8]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Hurried fixes for --merriamwebster and --synonyms.com
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: b0666af4c89587dfb2e803d0a922c0f9331d7cef
User & Date: mario 2021-09-16 14:59:07
Context
2021-12-30
02:07
Add `selectonly` mode as per user request. check-in: a80c65db71 user: mario tags: trunk
2021-09-16
14:59
Hurried fixes for --merriamwebster and --synonyms.com check-in: b0666af4c8 user: mario tags: trunk
2021-06-10
14:53
Change exception names, and use `LangSelection` for dialog (makes error more understandable). check-in: c62b11cb0e user: mario tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to dingonyms/dingonyms.py.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/usr/bin/env python3
# encoding: utf-8
# api: cli
# type: filter
# title: dingonyms
# description: fetch synonyms from various web services
# version: 0.5
# license: PD
# category: dictionary
# keywords: glossary, synonyms, antonyms
# classifiers: search, dict
# architecture: all
# depends: deb:ding (>= 1.8), python (>= 3.6), python:requests (>= 2.4)
# url: https://fossil.include-once.org/pagetranslate/wiki/dingonyms






|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/usr/bin/env python3
# encoding: utf-8
# api: cli
# type: filter
# title: dingonyms
# description: fetch synonyms from various web services
# version: 0.6
# license: PD
# category: dictionary
# keywords: glossary, synonyms, antonyms
# classifiers: search, dict
# architecture: all
# depends: deb:ding (>= 1.8), python (>= 3.6), python:requests (>= 2.4)
# url: https://fossil.include-once.org/pagetranslate/wiki/dingonyms
259
260
261
262
263
264
265

266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292

293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
            elif grp:
                word = f"{set_word} ({grp})"


    def merriamwebster(self, word):
        """ merriam-?webster | mw | mer\w* | m\w*w\*b\w* | \w*web\w* """
        html = http_get(f"https://www.merriam-webster.com/thesaurus/{word}")

        out.site("Merriam-Webster.com")
        ls = []
        grp = "Synonyms"
        # word links here are decorated with types (noun/verb), and groups neatly include a reference to the search term (or possibly a different related term)
        rx = ''' href="/thesaurus/([\w.-]+)\#(\w+)" | ="function-label">(?:Words\s)?(Related|Near\sAntonyms|Antonyms|Synonyms|\w+)\s\w+\s<em>([\w.-]+)</em> | (</html) '''
        for add_word, verb, set_grp, set_word, endhtml in re.findall(rx, html, re.X):
            #print(row)
            if add_word:
                ls.append("%s {%s}" % (add_word, verb[0]))
            elif ls:
                out.alternatives(word + " {%s}" % grp, ls)
                ls = []
            if set_grp or set_word:
                grp, word = set_grp, set_word


    def synonym_com(self, word):
        """
            synonyms?(\.?com)?$ | s$ | sy$ | sy?n\w*\\b(?<!de) |
            
            Doing a fair bit of super-specific HTML transforms here, because
            there's a wealth of decoration. DOM traversal might have been simpler
            in this case.
        """
        html = http_get(f"https://www.synonym.com/synonyms/{word}")
        html = re.sub('^.+?="result-group-container">', "", html, 0, re.S)
        html = re.sub('<div class="rightrail-container">.+$', "", html, 0, re.S)

        out.site("Synonym.com")
        rx = """
            <div\sclass="word-title.+?> \s*\d\.\s ([\w.\-]+) \s* 
               \s* .*?
               \s* <span\s+class="part-of-speech">\s*(\w+)[.\s]*</span>
               \s* <span\s+class="pronunciation">\((.+?)\)</span>
               \s* <span\s+class="definition"> (.+?) </div> |
            <a\sclass="chip[^">]*"\shref="/synonyms/([\w.-]+)" |
            <div\sclass="card-title[^>]+>\s*(Antonyms)\s*</div> |
            </html>
        """
        ls = []
        for group, verb, pron, defs, add_word, antonyms in re.findall(rx, html, re.X|re.S):
            if add_word:
                ls.append(add_word)
            else:
                if ls:
                    out.alternatives(word, ls)
                    ls = []
                if antonyms:
                    word = " 🞬 {Antonyms}"
                    continue
                defs = re.sub('(<[^>]+>|\s+)+', " ", defs, 0, re.S).strip()
                defs = " |   ".join(textwrap.wrap(defs, 50))
                word = group + " {" + verb + "} [" + pron + "] |  (" + defs + ")"

                
    def urban(self, word):







>




|




















|

>


|
<
<
<
<
|
<
|


|






|
|







259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297




298

299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
            elif grp:
                word = f"{set_word} ({grp})"


    def merriamwebster(self, word):
        """ merriam-?webster | mw | mer\w* | m\w*w\*b\w* | \w*web\w* """
        html = http_get(f"https://www.merriam-webster.com/thesaurus/{word}")
        #print(html)
        out.site("Merriam-Webster.com")
        ls = []
        grp = "Synonyms"
        # word links here are decorated with types (noun/verb), and groups neatly include a reference to the search term (or possibly a different related term)
        rx = ''' href="/thesaurus/([\w.-]+)">(\w+) | ="function-label">(?:Words\s)?(Related|Near\sAntonyms|Antonyms|Synonyms|\w+)\s\w+\s<em>([\w.-]+)</em> | (</html) '''
        for add_word, verb, set_grp, set_word, endhtml in re.findall(rx, html, re.X):
            #print(row)
            if add_word:
                ls.append("%s {%s}" % (add_word, verb[0]))
            elif ls:
                out.alternatives(word + " {%s}" % grp, ls)
                ls = []
            if set_grp or set_word:
                grp, word = set_grp, set_word


    def synonym_com(self, word):
        """
            synonyms?(\.?com)?$ | s$ | sy$ | sy?n\w*\\b(?<!de) |
            
            Doing a fair bit of super-specific HTML transforms here, because
            there's a wealth of decoration. DOM traversal might have been simpler
            in this case.
        """
        html = http_get(f"https://www.synonym.com/synonyms/{word}")
        html = re.sub('^.+?="(tabbed-header|content-container)">', "", html, 0, re.S)
        html = re.sub('<div class="rightrail-container">.+$', "", html, 0, re.S)
#        print(html)
        out.site("Synonym.com")
        rx = """
            <h4\sclass="section-list-header">([\w\s-]+)</h4> |




            <li>([\w\s'.-]+)(?:\s\((.*?)\))?</li> |

            (</html>)
        """
        ls = []
        for group, add_word, defs, html in re.findall(rx, html, re.X|re.S):
            if add_word:
                ls.append(add_word)
            else:
                if ls:
                    out.alternatives(word, ls)
                    ls = []
                if group:
                    word = word + " {" + group + "}"
                    continue
                defs = re.sub('(<[^>]+>|\s+)+', " ", defs, 0, re.S).strip()
                defs = " |   ".join(textwrap.wrap(defs, 50))
                word = group + " {" + verb + "} [" + pron + "] |  (" + defs + ")"

                
    def urban(self, word):