1
2
3
4
5
6
7
8
9
10
11
12
13 | # encoding: UTF-8
# api: streamtuner2
# title: LiveRadio
# description: Irish/worldwide radio station directory
# url: http://liveradio.ie/
# version: 0.4
# type: channel
# category: radio
# config: -
# { name: liveradio_tld, value: ie, type: select, select: ie=LiveRadio.ie|uk=LiveRadio.uk, description: Website to fetch from. }
# png:
# iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAABB0lEQVR4nLWTQUpDMRCGv0lregDBI3gAfW/hRrp8ZOMh5PUMXkFcu7EbTxHd
# CC4EhfQkQg/QR5txYQqvMdVHwdnMZJj555uQwH+YurpaNZUOqTWl5i5qGIusDxIAZgBGuBhCsiOgrq7WUa+tkReAjepHystQgmn8zt0As40y |
|
| 1
2
3
4
5
6
7
8
9
10
11
12
13 | # encoding: UTF-8
# api: streamtuner2
# title: LiveRadio
# description: Irish/worldwide radio station directory
# url: http://liveradio.ie/
# version: 0.5
# type: channel
# category: radio
# config: -
# { name: liveradio_tld, value: ie, type: select, select: ie=LiveRadio.ie|uk=LiveRadio.uk, description: Website to fetch from. }
# png:
# iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAABB0lEQVR4nLWTQUpDMRCGv0lregDBI3gAfW/hRrp8ZOMh5PUMXkFcu7EbTxHd
# CC4EhfQkQg/QR5txYQqvMdVHwdnMZJj555uQwH+YurpaNZUOqTWl5i5qGIusDxIAZgBGuBhCsiOgrq7WUa+tkReAjepHystQgmn8zt0As40y |
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118 | add = ahttp.get(self.base + "stations" + page_sfx, { "text": search, "country_id": "", "genre_id": ""})
html += add
if re.search('/\d+">Next</a>', add):
page += 1
else:
break
html = re.sub("</body>[\s\S]+<body[^>]*>", "", html)
# dom or regex
if conf.pyquery:
try:
return self.pq_extract(html)
except Exception as e:
log.ERR(e)
return self.rx_extract(html)
# Extract all the things
#
# · entries utilize HTML5 microdata classification
# · title and genre available right away
# · img url is embedded
# · keep station ID as `urn:liveradion:12345`
#
def rx_extract(self, html):
r = []
ls = re.findall("""
itemtype="http://schema.org/RadioStation"> .*?
href="(?:https?://www.liveradio.\w+)?/stations/([\w-]+) .*?
<img\s+src="/(files/images/[^"]+)" .*?
="country">([^<]+)< .*?
itemprop="name"><a[^>]+>([^<]+)</a> .*?
class="genre">([^<]+)<
""", html, re.X|re.S)
for row in ls: |
>
|
| 85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119 | add = ahttp.get(self.base + "stations" + page_sfx, { "text": search, "country_id": "", "genre_id": ""})
html += add
if re.search('/\d+">Next</a>', add):
page += 1
else:
break
html = re.sub("</body>[\s\S]+<body[^>]*>", "", html)
log.DATA(html)
# dom or regex
if conf.pyquery:
try:
return self.pq_extract(html)
except Exception as e:
log.ERR(e)
return self.rx_extract(html)
# Extract all the things
#
# · entries utilize HTML5 microdata classification
# · title and genre available right away
# · img url is embedded
# · keep station ID as `urn:liveradion:12345`
#
def rx_extract(self, html):
r = []
ls = re.findall("""
itemtype="https?://schema.org/RadioStation"> .*?
href="(?:https?://www.liveradio.\w+)?/stations/([\w-]+) .*?
<img\s+src="/(files/images/[^"]+)" .*?
="country">([^<]+)< .*?
itemprop="name"><a[^>]+>([^<]+)</a> .*?
class="genre">([^<]+)<
""", html, re.X|re.S)
for row in ls: |
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153 | </a>
<div class="name" itemprop="name"><a href="http://www.liveradio.ie/stations/soulconnexion-radio">Soulconnexion Radio</a></div>
<div class="genre">Funk, Soul</div>
"""
def pq_extract(self, html):
r = []
html = pq(html).make_links_absolute(self.base)
for radio in html.find("*[itemscope][itemtype='http://schema.org/RadioStation']"):
#log.DATA(radio)
radio = pq(radio)
href = radio.find("*[itemprop='name'] a").attr("href")
id = re.search("/([\w-]+)$", href).group(1)
r.append(dict(
homepage = self.base + "stations/" + id,
url = "urn:liveradio:" + id,
playing = radio.find("*.country").text(), |
|
|
| 139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154 | </a>
<div class="name" itemprop="name"><a href="http://www.liveradio.ie/stations/soulconnexion-radio">Soulconnexion Radio</a></div>
<div class="genre">Funk, Soul</div>
"""
def pq_extract(self, html):
r = []
html = pq(html).make_links_absolute(self.base)
for radio in html.find("*[itemscope][itemtype='http://schema.org/RadioStation'], *[itemscope][itemtype='https://schema.org/RadioStation']"):
log.DATA(radio)
radio = pq(radio)
href = radio.find("*[itemprop='name'] a").attr("href")
id = re.search("/([\w-]+)$", href).group(1)
r.append(dict(
homepage = self.base + "stations/" + id,
url = "urn:liveradio:" + id,
playing = radio.find("*.country").text(), |