Internet radio browser GUI for music/video streams from various directory services.

⌈⌋ ⎇ branch:  streamtuner2


Check-in [86ef3aaa35]

Overview
Comment:Fix google_homepage regex, filter out tunein+streema urls.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 86ef3aaa35b49c827fa8ec836647baac8991132b
User & Date: mario on 2016-04-17 23:54:11
Other Links: manifest | tags
Context
2016-04-18
23:56
Disable logging for google_homepage check-in: 4f6de091f1 user: mario tags: trunk
2016-04-17
23:54
Fix google_homepage regex, filter out tunein+streema urls. check-in: 86ef3aaa35 user: mario tags: trunk
2016-04-11
12:53
List Symbola/Unifont as soft dependency check-in: ae5130d76a user: mario tags: trunk
Changes

Modified channels/favicon.py from [0fa0d36699] to [de8c98b1c8].

232
233
234
235
236
237
238
239






240
241
242
243
244
245
246

247
248
249
250

251
252
253
254
255
256
257
def google_find_homepage(row):
    """ Searches for missing homepage URL via Google. """
    if row.get("url") not in tried_urls:
        tried_urls.append(row.get("url"))

    if row.get("title"):
        rx_t = re.compile('^(([^-:]+.?){1,2})')
        rx_u = re.compile(r'/url\?q=(https?://[^"&/]+)')







        # Use literal station title now
        title = row["title"]
        #title = title.group(0).replace(" ", "%20")
        
        # Do 'le google search
        html = ahttp.get("http://www.google.com/search", params=dict(hl="en", q=title, client="streamtuner2"), ajax=1, timeout=3.5)

                  
        # Find first URL hit
        url = rx_u.findall(html)
        if url:

            row["homepage"] = ahttp.fix_url(url[0])
            return True
    pass
#-----------------










|
>
>
>
>
>
>







>




>







232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
def google_find_homepage(row):
    """ Searches for missing homepage URL via Google. """
    if row.get("url") not in tried_urls:
        tried_urls.append(row.get("url"))

    if row.get("title"):
        rx_t = re.compile('^(([^-:]+.?){1,2})')
        rx_u = re.compile(r'''
            (?:  <h3\s+class="r"><a\s+href="  |  /url\?q=  )
            (https?://
            (?!www\.google|webcache|google|tunein|streema)
            [^"&]+)''',
            re.X
        )

        # Use literal station title now
        title = row["title"]
        #title = title.group(0).replace(" ", "%20")
        
        # Do 'le google search
        html = ahttp.get("http://www.google.com/search", params=dict(hl="en", q=title, client="streamtuner2"), ajax=1, timeout=3.5)
        log.DATA(re.sub("<(script|style)[^>]*>.*?</(script|style)>", "", html, 100, re.S))
                  
        # Find first URL hit
        url = rx_u.findall(html)
        if url:
            log.DATA(url)
            row["homepage"] = ahttp.fix_url(url[0])
            return True
    pass
#-----------------