Check-in [9688154862]
Overview
Comment: | Switched radionet plugin to resolve_urn() and speedier -grep loop.
|
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
9688154862a61e33ba96cb4eefd87da3 |
User & Date: | mario on 2019-03-24 11:50:24 |
Other Links: | manifest | tags |
Context
2019-06-14
| ||
18:25 | streema: fix url regex (now alphanumeric titles) and urn_resolve patterns for new player layout check-in: fac6bc374a user: mario tags: trunk | |
2019-03-24
| ||
11:50 |
Switched radionet plugin to resolve_urn() and speedier -grep loop.
check-in: 9688154862 user: mario tags: trunk
| |
10:25 | Crude fix for new station lookup. Regex still has horrible backtracking. (Should use resolve_urn rather than rnjs playlist workaround.) check-in: 6bfe67e367 user: mario tags: trunk | |
Changes
Modified contrib/radionet.py from [632ecf0169] to [edd29444df].
1 2 3 4 5 | # encoding: UTF-8 # api: streamtuner2 # title: radio.net # description: Europe's biggest radio platform # url: http://radio.net/ | | | | | < < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | # encoding: UTF-8 # api: streamtuner2 # title: radio.net # description: Europe's biggest radio platform # url: http://radio.net/ # version: 0.8 # type: channel # category: radio # png: # iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAt0lEQVR42mNgYGD4r+Ar/F/BDwkD+SBxojBMs1mLPBArgGlFqEEENYMNQNLsukIDYkirAvGu # ABsA1OC6XOP/5f8nwIaYAg0k2gBFsAsgTgcZkvnfDugFEeK9AFKsCPMG0CU6eZJgQ4R1eP8H7LLEivWyFJANQcQCLPBAmkGG4MJohmA6C6QA5gI5OxEUDNII # MwSvASBFIA3ociCxkWQAKMDICkSQIpgh2LDnSmP80YhsCFEJiRIMADpmeUOpqgjRAAAAAElFTkSuQmCC # priority: optional # extraction-method: regex # # Radio.net lists around 20.000 worldwide radio stations. # A maximum of three pages from each genre are fetched here, # some of the empty categories already omitted. # # The website heavily depends on JavaScript, a Flash player, # some social tracking cookies. Now requires additional page # request to get real streaming url, but at least no more # expiring access key. import time import json import re from config import * from channels import * |
︙ | ︙ | |||
60 61 62 63 64 65 66 | # → "streamUrl": and "description": are scanned for. # class radionet (ChannelPlugin): # control flags has_search = False audioformat = "audio/mpeg" | | | 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | # → "streamUrl": and "description": are scanned for. # class radionet (ChannelPlugin): # control flags has_search = False audioformat = "audio/mpeg" listformat = "href" titles = dict(listeners=False, playing="Description") # sources apiPrefix = "https://api.radio.net/info/v2" genre_url = "http://www.radio.net/genre/{}" apiKey = None |
︙ | ︙ | |||
86 87 88 89 90 91 92 | # category page, get key html = ahttp.get(self.genre_url.format(cat)) for p in range(2, 4): if html.find('"?p={}">'.format(p)) >= 0: html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p)) self.set_key(html) | < | > | > | | | < | > | | > | < | > | | 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | # category page, get key html = ahttp.get(self.genre_url.format(cat)) for p in range(2, 4): if html.find('"?p={}">'.format(p)) >= 0: html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p)) self.set_key(html) r = [] # split station blocks for row in re.findall("""<div class="stationinfo(.+?)</div>""", html, re.S): log.FOR_rOW(1) # extract text fields d = re.findall(""" <a\s+href="(?:https?:)?(//(?:[\w-]+)\.radio\.net/s/([^"]+)/?)" .*? <img[^>]+ src="([^<">]+)" .*? <strong[^>]*>(.*?)</strong> .*? <small[^>]*>\s*(.*?)\s*</small> """, row, re.X|re.S) # log.DATA_ROW(d) # refurbish extracted strings if d and len(d) and len(d[0]) == 5: href, name, img, title, desc = d[0] r.append(dict( name = name, genre = cat, title = unhtml(title), playing = unhtml(desc), url = "urn:radionet:"+name, #self._url(name), homepage = "http:{}".format(href), img = img, )); return r # api search is gone, now requires to fetch streamUrl from per-radio homepage def resolve_urn(self, row): if row.get("url", "-").find("urn:radionet:") != 0: return html = ahttp.get(row["homepage"]) stream = re.findall('"streamUrl":\s*"([^"]+)"', html, re.S) if stream: row["url"] = stream[0] return row # extract JavaScript key from any HTML blob (needed for station query) def set_key(self, html): ls = re.findall("""apiKey: '(\w+)'""", html) if ls: self.apiKey = ls[0] |