97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
self.status(p / 5.5)
if html.find('?p={}"'.format(p)) >= 0:
html += ahttp.get(self.genre_url.format(urlcat) + "?p={}".format(p))
self.set_key(html)
r = []
# fetch JSON
ls_json = re.findall("__CONTEXT_PROVIDER__\s*=\s*(\{.+\});", html)
if ls_json:
try:
return self.from_json(ls_json)
except:
log.error("JSON extraction failed", traceback.format_exc())
# prefetch images from embedded json (genres and location would also be sourceable from "playables":[…])
|
|
|
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
self.status(p / 5.5)
if html.find('?p={}"'.format(p)) >= 0:
html += ahttp.get(self.genre_url.format(urlcat) + "?p={}".format(p))
self.set_key(html)
r = []
# fetch JSON
ls_json = re.findall("<script\sid=\"__NEXT_DATA__\"[^>]*>(\{.+?\})[;<]", html)
if ls_json:
try:
return self.from_json(ls_json)
except:
log.error("JSON extraction failed", traceback.format_exc())
# prefetch images from embedded json (genres and location would also be sourceable from "playables":[…])
|
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
));
return r
# process json
def from_json(self, ls_json):
ls = []
for js in ls_json:
js = json.loads(js)["data"]
print(json.dumps(js, indent=4))
ls += js["topTenStations"]#["playables"]
r = []
for row in ls:
href = row["id"]
r.append(dict(
name = href,
title = row["name"],
genre = ",".join(row.get("genres", [])),
|
|
|
>
|
|
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
|
));
return r
# process json
def from_json(self, ls_json):
ls = []
for js in ls_json:
js = json.loads(js)
#print(json.dumps(js, indent=4))
ls += js["props"]["pageProps"]["data"]["stations"]["playables"]
#ls += js[data]["topTenStations"]#["playables"]
r = []
for row in ls:
href = row["id"]
r.append(dict(
name = href,
title = row["name"],
genre = ",".join(row.get("genres", [])),
|