Check-in [b8a37b9b5b]
Overview
Comment: | Extract JSON blob from __NEXT_DATA__ script section |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
b8a37b9b5b2f97d7d8adcafab03c9a8d |
User & Date: | mario on 2022-02-15 22:05:01 |
Other Links: | manifest | tags |
Context
2022-02-16
| ||
08:16 | update regex extraction fallback check-in: 2f83c61edc user: mario tags: trunk | |
2022-02-15
| ||
22:05 | Extract JSON blob from __NEXT_DATA__ script section check-in: b8a37b9b5b user: mario tags: trunk | |
21:53 | temporary workaround: topRadioStations, still need to discover original list check-in: 651abb068d user: mario tags: trunk | |
Changes
Modified contrib/radionet.py from [47338112d3] to [d0ea1042b5].
︙ | ︙ | |||
97 98 99 100 101 102 103 | self.status(p / 5.5) if html.find('?p={}"'.format(p)) >= 0: html += ahttp.get(self.genre_url.format(urlcat) + "?p={}".format(p)) self.set_key(html) r = [] # fetch JSON | | | 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | self.status(p / 5.5) if html.find('?p={}"'.format(p)) >= 0: html += ahttp.get(self.genre_url.format(urlcat) + "?p={}".format(p)) self.set_key(html) r = [] # fetch JSON ls_json = re.findall("<script\sid=\"__NEXT_DATA__\"[^>]*>(\{.+?\})[;<]", html) if ls_json: try: return self.from_json(ls_json) except: log.error("JSON extraction failed", traceback.format_exc()) # prefetch images from embedded json (genres and location would also be sourceable from "playables":[…]) |
︙ | ︙ | |||
159 160 161 162 163 164 165 | )); return r # process json def from_json(self, ls_json): ls = [] for js in ls_json: | | | > | | 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | )); return r # process json def from_json(self, ls_json): ls = [] for js in ls_json: js = json.loads(js) #print(json.dumps(js, indent=4)) ls += js["props"]["pageProps"]["data"]["stations"]["playables"] #ls += js[data]["topTenStations"]#["playables"] r = [] for row in ls: href = row["id"] r.append(dict( name = href, title = row["name"], genre = ",".join(row.get("genres", [])), |
︙ | ︙ |