Check-in [4449172e82]
Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Apply googletranslate patch to standalone content.xml script |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
4449172e827ac1ac42203be6cab0f70a |
User & Date: | mario 2021-02-02 12:32:59 |
Context
2021-02-03
| ||
20:25 | Add more languages ('le continental submenus) check-in: bc8c7befa0 user: mario tags: trunk | |
2021-02-02
| ||
12:32 | Apply googletranslate patch to standalone content.xml script check-in: 4449172e82 user: mario tags: trunk | |
2021-01-31
| ||
13:34 | Note on extraction failures in google result page / logs. check-in: 1495c467bf user: mario tags: trunk | |
Changes
Changes to off/contentxmltrans.py.
︙ | ︙ | |||
38 39 40 41 42 43 44 | } # log file import logging logging.basicConfig(filename='/tmp/pagetranslate-libreoffice.log', level=logging.DEBUG) # regex import re rx_xmltrans = re.compile('(<text:span[^>]+>)([\w\s,.]+)(?=</text)', re.S|re.UNICODE) | | | 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | } # log file import logging logging.basicConfig(filename='/tmp/pagetranslate-libreoffice.log', level=logging.DEBUG) # regex import re rx_xmltrans = re.compile('(<text:span[^>]+>)([\w\s,.]+)(?=</text)', re.S|re.UNICODE) rx_gtrans = re.compile('class="(?:t0|result-container)">(.+?)</div>', re.S) rx_splitpara = re.compile("(.{1,1895\.}|.{1,1900}\s|.*$)", re.S) rx_empty = re.compile("^[\s\d,.:;ยง():-]+$") rx_letters = re.compile("\w\w+", re.UNICODE) |
︙ | ︙ | |||
69 70 71 72 73 74 75 | parts[1] = self.translate(parts[1]) print(repr(parts)) return "".join(parts) # request text translation from google def askgoogle(self, text, dst_lang="en", src_lang='auto'): # fetch translation page | | | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | parts[1] = self.translate(parts[1]) print(repr(parts)) return "".join(parts) # request text translation from google def askgoogle(self, text, dst_lang="en", src_lang='auto'): # fetch translation page url = "https://translate.google.com/m?tl=%s&sl=%s&q=%s" % ( dst_lang, src_lang, quote_plus(text.encode("utf-8")) ) html = urlopen( Request(url, headers=http_headers), **ssl_args ).read().decode('utf-8') # extract content from text <div> m = rx_gtrans.search(html) |
︙ | ︙ |