Check-in [697df17183]
Overview
Comment: | Update for recent change, but also implement JSON extraction while at it. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
697df17183784cfcd54607635d36fd1e |
User & Date: | mario on 2020-12-28 19:49:36 |
Other Links: | manifest | tags |
Context
2021-01-30
| ||
10:31 | Implement jamendo APIs new `download_allowed` flags (as simple state icons here - purely visual, not actually affecting download button / not even sure if streamripper works on all entry types). check-in: 3e718937a3 user: mario tags: trunk | |
2020-12-28
| ||
19:49 | Update for recent change, but also implement JSON extraction while at it. check-in: 697df17183 user: mario tags: trunk | |
2020-12-18
| ||
10:49 | Limit spaces `# {0,3}` in plugin comment blocks before field: name check-in: da617b8b4f user: mario tags: trunk | |
Changes
Modified contrib/radionet.py from [4a9edf7fc3] to [2899884b53].
1 2 3 4 5 | # encoding: UTF-8 # api: streamtuner2 # title: radio.net # description: Europe's biggest radio platform # url: http://radio.net/ | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 | # encoding: UTF-8 # api: streamtuner2 # title: radio.net # description: Europe's biggest radio platform # url: http://radio.net/ # version: 1.1 # type: channel # category: radio # png: # iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAt0lEQVR42mNgYGD4r+Ar/F/BDwkD+SBxojBMs1mLPBArgGlFqEEENYMNQNLsukIDYkirAvGu # ABsA1OC6XOP/5f8nwIaYAg0k2gBFsAsgTgcZkvnfDugFEeK9AFKsCPMG0CU6eZJgQ4R1eP8H7LLEivWyFJANQcQCLPBAmkGG4MJohmA6C6QA5gI5OxEUDNII # MwSvASBFIA3ociCxkWQAKMDICkSQIpgh2LDnSmP80YhsCFEJiRIMADpmeUOpqgjRAAAAAElFTkSuQmCC # priority: optional |
︙ | ︙ | |||
22 23 24 25 26 27 28 29 30 31 32 33 34 35 | # request to get real streaming url, but at least no more # expiring access key. import time import json import re from config import * from channels import * import ahttp import action # obsolete: hook special JSON format in to avoid grepping images by generic handler | > | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | # request to get real streaming url, but at least no more # expiring access key. import time import json import re import traceback from config import * from channels import * import ahttp import action # obsolete: hook special JSON format in to avoid grepping images by generic handler |
︙ | ︙ | |||
94 95 96 97 98 99 100 | html = ahttp.get(self.genre_url.format(urlcat)) for p in range(2, 4): self.status(p / 5.5) if html.find('?p={}"'.format(p)) >= 0: html += ahttp.get(self.genre_url.format(urlcat) + "?p={}".format(p)) self.set_key(html) r = [] | | > > > > > > > > > > > > > > > > | | | > | | | | < < | | > | | < < | > > > > > > > > > > > > > > > > > > > > | 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | html = ahttp.get(self.genre_url.format(urlcat)) for p in range(2, 4): self.status(p / 5.5) if html.find('?p={}"'.format(p)) >= 0: html += ahttp.get(self.genre_url.format(urlcat) + "?p={}".format(p)) self.set_key(html) r = [] # fetch JSON ls_json = re.findall("__CONTEXT_PROVIDER__\s*=\s*(\{.+\});", html) if ls_json: try: return self.from_json(ls_json) except: log.error("JSON extraction failed", traceback.format_exc()) # prefetch images from embedded json (genres and location would also be sourceable from "playables":[…]) imgs = dict(re.findall('\],"id":"(\w+)","logo100x100":"(htt[^"]+)",', html)) #log.DATA(imgs) # top 100 of the most horrible html serializations """ <a data-testid="list-item" href="/s/rds"><div class="sc-1crnfmg-8 dUCUtS"><div class="sc-1crnfmg-0 hwucGp"><div class="lazyload-placeholder"></div><div class="sc-1crnfmg-1 eJysYN"><svg class="sc-1crnfmg-9 bWqKYT" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32"><path d="M4 32l25.26-16L4 0z"></path></svg></div></div><div class="sc-1crnfmg-2 evYzNY"><div class="sc-1crnfmg-3 bbMMP">RDS - Radio Dimensione Suono</div><div class="sc-1crnfmg-5 cMsIwq">Rome, <!-- -->Italy<!-- --> / Hits, Pop, Top 40 & Charts</div><div class="sc-1crnfmg-6 hSmqVb"></div></div></div></a></div><div class="sc-1crnfmg-11 sc-1crnfmg-12 kbEwWf"> <a data-testid="list-item" href="/s/kiis1027"><div class="sc-1crnfmg-8 dUCUtS"><div class="sc-1crnfmg-0 hwucGp"><div class="lazyload-placeholder"></div><div class="sc-1crnfmg-1 eJysYN"><svg class="sc-1crnfmg-9 bWqKYT" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32"><path d="M4 32l25.26-16L4 0z"></path></svg></div></div><div class="sc-1crnfmg-2 evYzNY"><div class="sc-1crnfmg-3 bbMMP">102.7 KIIS FM</div><div class="sc-1crnfmg-5 cMsIwq">Los Angeles, <!-- -->USA<!-- --> / Top 40 & Charts, Hits</div><div class="sc-1crnfmg-6 hSmqVb"></div></div></div></a></div><div class="sc-1crnfmg-11 sc-1crnfmg-12 kbEwWf"> """ rx = re.compile(""" <a\s+[^>]*\\bhref="(?:https?:)?(?://(?:[\w-]+)\.radio\.net)?/s/([^"]+)/?"> .*? <div[^>]+> (\w[^<]+) </div> \s* <div[^>]+> (\w[^/]+) \s+ / \s+ (\w.+?)</div> """, re.X|re.S ) # extract text fields for d in re.findall(rx, html): #log.DATA_ROW(d) href, title, location, desc = d # refurbish extracted strings r.append(dict( name = href, genre = unhtml(desc), title = unhtml(title), playing = unhtml(location), url = "urn:radionet:"+href, homepage = "http://www.radio.net/s/{}".format(href), img = imgs.get(href, "https://www.radio.net/favicon.ico"), )); return r # process json def from_json(self, ls_json): ls = [] for js in ls_json: ls += json.loads(js)["data"]["stations"]["playables"] r = [] for row in ls: href = row["id"] r.append(dict( name = href, title = row["name"], genre = ",".join(row["genres"]), url = "urn:radionet:"+href, playing = row.get("city", row.get("country", "-")), homepage = "http://www.radio.net/s/{}".format(href), img = row["logo100x100"], )) print(row) return r # api search is gone, now requires to fetch streamUrl from per-radio homepage def resolve_urn(self, row): if row.get("url", "-").find("urn:radionet:") != 0: return html = ahttp.get(row["homepage"]) stream = re.findall('"stream[s:[{"\s]+url"[\s:]+"([^"]+)"', html, re.S|re.I) |
︙ | ︙ |