Check-in [b8a37b9b5b]
Overview
| Comment: | Extract JSON blob from __NEXT_DATA__ script section |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
b8a37b9b5b2f97d7d8adcafab03c9a8d |
| User & Date: | mario on 2022-02-15 22:05:01 |
| Other Links: | manifest | tags |
Context
|
2022-02-16
| ||
| 08:16 | update regex extraction fallback check-in: 2f83c61edc user: mario tags: trunk | |
|
2022-02-15
| ||
| 22:05 | Extract JSON blob from __NEXT_DATA__ script section check-in: b8a37b9b5b user: mario tags: trunk | |
| 21:53 | temporary workaround: topRadioStations, still need to discover original list check-in: 651abb068d user: mario tags: trunk | |
Changes
Modified contrib/radionet.py from [47338112d3] to [d0ea1042b5].
| ︙ | ︙ | |||
97 98 99 100 101 102 103 |
self.status(p / 5.5)
if html.find('?p={}"'.format(p)) >= 0:
html += ahttp.get(self.genre_url.format(urlcat) + "?p={}".format(p))
self.set_key(html)
r = []
# fetch JSON
| | | 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
self.status(p / 5.5)
if html.find('?p={}"'.format(p)) >= 0:
html += ahttp.get(self.genre_url.format(urlcat) + "?p={}".format(p))
self.set_key(html)
r = []
# fetch JSON
ls_json = re.findall("<script\sid=\"__NEXT_DATA__\"[^>]*>(\{.+?\})[;<]", html)
if ls_json:
try:
return self.from_json(ls_json)
except:
log.error("JSON extraction failed", traceback.format_exc())
# prefetch images from embedded json (genres and location would also be sourceable from "playables":[…])
|
| ︙ | ︙ | |||
159 160 161 162 163 164 165 |
));
return r
# process json
def from_json(self, ls_json):
ls = []
for js in ls_json:
| | | > | | 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
));
return r
# process json
def from_json(self, ls_json):
ls = []
for js in ls_json:
js = json.loads(js)
#print(json.dumps(js, indent=4))
ls += js["props"]["pageProps"]["data"]["stations"]["playables"]
#ls += js[data]["topTenStations"]#["playables"]
r = []
for row in ls:
href = row["id"]
r.append(dict(
name = href,
title = row["name"],
genre = ",".join(row.get("genres", [])),
|
| ︙ | ︙ |