1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
-
+
-
-
-
+
+
+
-
-
|
# encoding: UTF-8
# api: streamtuner2
# title: radio.net
# description: Europe's biggest radio platform
# url: http://radio.net/
# version: 0.7
# version: 0.8
# type: channel
# category: radio
# png:
# iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAt0lEQVR42mNgYGD4r+Ar/F/BDwkD+SBxojBMs1mLPBArgGlFqEEENYMNQNLsukIDYkirAvGu
# ABsA1OC6XOP/5f8nwIaYAg0k2gBFsAsgTgcZkvnfDugFEeK9AFKsCPMG0CU6eZJgQ4R1eP8H7LLEivWyFJANQcQCLPBAmkGG4MJohmA6C6QA5gI5OxEUDNII
# MwSvASBFIA3ociCxkWQAKMDICkSQIpgh2LDnSmP80YhsCFEJiRIMADpmeUOpqgjRAAAAAElFTkSuQmCC
# priority: optional
# extraction-method: regex
#
# Radio.net lists around 20.000 worldwide radio stations.
# A maximum of three pages from each genre are fetched here,
# some of the empty categories already omitted.
#
# The website heavily depends on JavaScript, a Flash player,
# some social tracking cookies. But still feasible to access
# per custom JSON extractor.
#
# some social tracking cookies. Now requires additional page
# request to get real streaming url, but at least no more
# expiring access key.
# May require refreshing the station lists once in a while,
# because there's an API key in each JSON station info URL.
import time
import json
import re
from config import *
from channels import *
|
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
-
+
|
# → "streamUrl": and "description": are scanned for.
#
class radionet (ChannelPlugin):
# control flags
has_search = False
audioformat = "audio/mpeg"
listformat = "rnjs"
listformat = "href"
titles = dict(listeners=False, playing="Description")
# sources
apiPrefix = "https://api.radio.net/info/v2"
genre_url = "http://www.radio.net/genre/{}"
apiKey = None
|
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
-
-
+
+
-
+
+
-
+
-
-
+
+
-
-
-
-
-
+
+
+
+
+
+
-
-
-
+
+
+
|
# category page, get key
html = ahttp.get(self.genre_url.format(cat))
for p in range(2, 4):
if html.find('"?p={}">'.format(p)) >= 0:
html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p))
self.set_key(html)
log.HTML(html)
r = []
# split station blocks
for row in re.split("""<div class="stationinfo""", html)[1:]:
for row in re.findall("""<div class="stationinfo(.+?)</div>""", html, re.S):
log.FOR_rOW(1)
# extract text fields
d = re.findall("""
<a\s+href="(?:https?:)?(//(?:[\w-]+)\.radio\.net/s/([^"]+)/?)" .*?
<img[^>]+ src="([^<">]+)" .*?
<strong[^>]*>(.*?)</strong> .*?
<small[^>]*>\s*(.*?)\s*</small> .*?
<small[^>]*>\s*(.*?)\s*</small>
""", row, re.X|re.S)
# log.DATA_ROW(d)
# refurbish extracted strings
if d and len(d) and len(d[0]) == 5:
href, name, img, title, desc = d[0]
r.append(dict(
name = name,
genre = cat,
title = unhtml(title),
playing = unhtml(desc),
url = "http:{}".format(href), #self._url(name),
url = "urn:radionet:"+name, #self._url(name),
homepage = "http:{}".format(href),
img = img,
));
return r
# api search is gone, now requires to fetch streamUrl from per-radio homepage
# Patch together JSON station info URL
def _url(self, name):
return \
"{}/search/station?apikey={}&pageindex=1&station={}".format(
self.apiPrefix, self.apiKey, name
def resolve_urn(self, row):
if row.get("url", "-").find("urn:radionet:") != 0:
return
html = ahttp.get(row["homepage"])
stream = re.findall('"streamUrl":\s*"([^"]+)"', html, re.S)
if stream:
) # '?_={time}&' is omitted here, only relevant to jQuery/AJAX,
# and just made bookmarks.is_in() fail due to randomized URLs
row["url"] = stream[0]
return row
# extract JavaScript key from any HTML blob (needed for station query)
def set_key(self, html):
ls = re.findall("""apiKey: '(\w+)'""", html)
if ls:
self.apiKey = ls[0]
|