Index: contrib/radionet.py ================================================================== --- contrib/radionet.py +++ contrib/radionet.py @@ -1,11 +1,11 @@ # encoding: UTF-8 # api: streamtuner2 # title: radio.net # description: Europe's biggest radio platform # url: http://radio.net/ -# version: 0.9 +# version: 1.0 # type: channel # category: radio # png: # iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAt0lEQVR42mNgYGD4r+Ar/F/BDwkD+SBxojBMs1mLPBArgGlFqEEENYMNQNLsukIDYkirAvGu # ABsA1OC6XOP/5f8nwIaYAg0k2gBFsAsgTgcZkvnfDugFEeK9AFKsCPMG0CU6eZJgQ4R1eP8H7LLEivWyFJANQcQCLPBAmkGG4MJohmA6C6QA5gI5OxEUDNII @@ -30,11 +30,11 @@ from channels import * import ahttp import action -# hook special JSON format in to avoid grepping images by generic handler +# obsolete: hook special JSON format in to avoid grepping images by generic handler action.playlist_fmt_prio.insert(5, "rnjs") action.playlist_content_map.insert(7, ("rnjs", r'"logo175x175rounded"')) action.extract_playlist.extr_urls["rnjs"] = dict( url = r" (?x) \"streamUrl\" \s*:\s* \"(\w+:\\?/\\?/[^\"]+)\" ", title = r" (?x) \"(?:description|seoTitle)\" \s*:\s* \"([^\"]+)\" ", @@ -43,81 +43,105 @@ # Radio.net # # · Uses HTML block-wise regex extraction. -# → +# →
+# → basically just title/url, images in a separate json blob +# +# · Currently using an urn: to resolve stream urls at play time. # +# previously: # · There's an API key in each page listing, contained in a script block # as `apiKey: '…'?` -# # · Which is needed for generating the station info JSON urls: # → https://api.radio.net/info/v2/search/station?apikey=…&pageindex=1&station=STNAME -# # · To extract these JSON info targets, a custom extraction recipie is injected # into the action module. # → "streamUrl": and "description": are scanned for. # +# todo: +# · https://prod.radio-api.net/stations/local?count=10 +# class radionet (ChannelPlugin): # control flags has_search = False audioformat = "audio/mpeg" listformat = "href" titles = dict(listeners=False, playing="Description") + img_resize = 33 # sources apiPrefix = "https://api.radio.net/info/v2" genre_url = "http://www.radio.net/genre/{}" apiKey = None # Retrieve cat list and map def update_categories(self): - html = ahttp.get("http://www.radio.net/") + html = ahttp.get("http://www.radio.net/genre") self.set_key(html) - ls = re.findall("""
  • ([\w\s']+)""", html) - self.categories = [i for i in ls][0:-18] + ls = re.findall("""([^<]+)""", html) + self.categories = ["Top 40 and Charts"] + [i[1] for i in ls] # Fetch entries def update_streams(self, cat, search=None): # category page, get key - html = ahttp.get(self.genre_url.format(cat)) + urlcat = cat.replace(" ", "-").lower() + html = ahttp.get(self.genre_url.format(urlcat)) for p in range(2, 4): - if html.find('"?p={}">'.format(p)) >= 0: - html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p)) + if html.find('?p={}"'.format(p)) >= 0: + html += ahttp.get(self.genre_url.format(urlcat) + "?p={}".format(p)) self.set_key(html) r = [] - - # split station blocks - for row in re.findall("""
    ]+ src="([^<">]+)" .*? - ]*>(.*?) .*? - ]*>\s*(.*?)\s* - """, row, re.X|re.S) -# log.DATA_ROW(d) + # prefetch images from embedded json (genres and location would also be sourceable from "playables":[…]) + imgs = dict(re.findall('\],"id":"(\w+)","logo100x100":"(htt[^"]+)",', html)) + #log.DATA(imgs) + + # top 100 of the most horrible html serializations + """ +
  • +
    +
    +
    +
    +
    + +
    +
    +
    +
    KISS FM UK
    +
    London, United Kingdom / Hits, Pop, R'n'B
    +
    +
    + {"city":"Hanover","country":"Germany","genres":["Pop","80s","Top 40 & Charts"],"id":"ndr2","logo100x100":"https://d3kle7qwymxpcy.cloudfront.net/images/broadcasts/02/33/2262/1/c100.png","logo300x300":"https://d3kle7qwymxpcy.cloudfront.net/images/broadcasts/02/33/2262/1/c300.png","logo630x630":"","name":"NDR 2","type":"STATION"}') + """ + rx = re.compile(""" + .*? + ]+> (\w[^<]+)
    \s* + ]+> (\w[^/]+) \s+ / \s+ (\w.+?)
    + """, re.X|re.S + ) + # extract text fields + for d in re.findall(rx, html): + #log.DATA_ROW(d) + href, title, location, desc = d # refurbish extracted strings - if d and len(d) and len(d[0]) == 5: - href, name, img, title, desc = d[0] - r.append(dict( - name = name, - genre = cat, - title = unhtml(title), - playing = unhtml(desc), - url = "urn:radionet:"+name, #self._url(name), - homepage = "http:{}".format(href), - img = img, - )); + r.append(dict( + name = href, + genre = unhtml(desc), + title = unhtml(title), + playing = unhtml(location), + url = "urn:radionet:"+href, + homepage = "http://www.radio.net/s/{}".format(href), + img = imgs.get(href, "https://www.radio.net/favicon.ico"), + )); return r # api search is gone, now requires to fetch streamUrl from per-radio homepage def resolve_urn(self, row): if row.get("url", "-").find("urn:radionet:") != 0: @@ -125,15 +149,15 @@ html = ahttp.get(row["homepage"]) stream = re.findall('"stream[s:[{"\s]+url"[\s:]+"([^"]+)"', html, re.S|re.I) if stream: row["url"] = stream[0] return row - + # extract JavaScript key from any HTML blob (needed for station query) def set_key(self, html): ls = re.findall("""apiKey: '(\w+)'""", html) if ls: self.apiKey = ls[0]