1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 | # encoding: UTF-8
# api: streamtuner2
# title: radiolist.net
# description: Station list by continent+country
# url: http://radiolist.net/
# version: 0.4
# type: channel
# category: radio
# priority: extra
# png:
# iVBORw0KGgoAAAANSUhEUgAAABgAAAAYBAMAAAASWSDLAAAAFVBMVEVKb61qibyDnMegs9S6yeDV4O37/vyx66abAAAAAWJLR0QAiAUdSAAAAAlwSFlzAAALEwAACxMB
# AJqcGAAAAAd0SU1FB+ECDBAgLJqgZW4AAADoSURBVBjTNdBNj4MgEAbgqdLeZdo9C5NwFmo5Y7Wedauc1y/+/09YdLskkDwJmZl3IOxnON4A8frQhdc/7mG2cv3gx29X
# rdUfZuVHQ3JHEzZ7GSuNXxFV/FYYwryO6MOiZqEdnQPUC/fsXZaMuxa6MFfOVYN7kIWpHZClyJGLFjbbC617KaRUEJ4r4fU7IqNYrW5f2kgU5gZInG6MZ086eejcyIvO
# 1KwoLayoJjqnuWO5giW8msxVmBQXD5PttSlRm8TG2fDNZS3rRO/opeSCMnPa82xSmNgkfRxJ5yZxlPrPDmLu+7GqX4lERq4G0UEyAAAAAElFTkSuQmCC
# extraction-method: regex
#
# Radio station list grouped by continents and countries.
# Some categories return no results, because web players are
# filtered out.
import re
import action
import ahttp
from config import *
from channels import *
# radiolist.net
#
# · Groups stations by continents and countries. Where Europe seems to be the
# main category (empty "" path), while U.S. is labeled "/world", and Canada
# and Asia etc. again a subpath "/world/canada" even. The .catmap{} assigns
# paths to titles.
#
# · Playlist formats vary wildly. Therefore this module comes with a guessing
# method (super crude) of its own.
#
# · The audio-format-from-URL guessing should be generalized out of here perhaps.
#
# · Each station is in a <tr>…</tr> block. Invidual regexps are used for field
# extraction afterwards (instead of a block match).
#
# · Entries may contain more than one streaming url. Each accompanied by a
# bitrate. → Therefore the .best_url() sorting method.
#
# · Later versions might of course use multi-urls again…
#
class radiolist (ChannelPlugin, action.heuristic_funcs):
# module attributes
listformat = "pls"
has_search = False
categories = ["Europe", "America", "Canada", "Oceania", "Asia"]
catmap = {"Europe":"", "America":"world", "Canada":"world/canada", "Oceania":"world/oceania", "Asia":"world/asia"}
titles = dict( genre="Genre", title="Station", playing="Location", bitrate="Bitrate", listeners=False )
# just a static list for now
def update_categories(self):
self.catmap = {"Europe":"", "America":"world", "Canada":"world/canada", "Oceania":"world/oceania", "Asia":"world/asia"}
c = []#
rx_links = re.compile(r"""
<td(?:\sstyle="height:\s30px;")?><a\s+href="(?:http://www.radiolist.net)?/([\w/.-]+)">([\w\s-]+)</a>[^<]*</td>
""", re.X)
for title in ["Europe", "America", "Canada", "Oceania", "Asia"]:
c.append(title)
html = ahttp.get("http://www.radiolist.net/" + self.catmap[title])
sub = []
for p,t in re.findall(rx_links, html):
if t in ["Terms", "About Us", "Donation", "United States"]:
continue
sub.append(t)
self.catmap[t] = p
c.append(sorted(sub))
self.categories = c
# extract stream urls
def update_streams(self, cat):
rx_title = re.compile('<a\s+href="([^">]+)"[^>]+target="_blank"[^>]*>(.+?)</a>', re.I)
rx_urls = re.compile('<a href="([^">]+)">(\d+)(?: Kbps)*</a>', re.I)
rx_genre = re.compile('<td[^>]+>(\w*[^<>]*)</td>\s*<td[^>]+>(\w+[^<>]+)</td>\s*$', re.I)
entries = []
html = ahttp.get("http://radiolist.net/" + self.catmap[cat])
for block in re.findall("<tr>(.+?)</tr>", html, re.S):
ut = re.findall(rx_title, block) # homepage+title
uu = re.findall(rx_urls, block) # urls+bitrates
lg = re.findall(rx_genre, block) # location+genre
#print ut, uu, lg
if ut and uu and lg:
url, br = self.best_url(uu)
entries.append(dict(
homepage = ut[0][0],
title = unhtml(ut[0][1]),
url = url,
bitrate = br,
format = self.mime_guess(url, "audio/mpeg"),
listformat = self.list_guess(url),
playing = lg[0][0],
genre = lg[0][1]
))
# done
[log.DATA(e) for e in entries]
return entries
# pick highest rated URL from [(url,bitrate),…] tuples
def best_url(self, urls):
r = dict([(u, to_int(b)) for u,b in urls]) # {url: bitrate, …}
best = sorted(r, key=r.get, reverse=True)
return best[0], r[best[0]]
|
|
|
<
>
|
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
|
<
>
|
>
|
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
<
<
<
|
|
>
>
|
<
|
|
|
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
< | 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
| # encoding: UTF-8
# api: streamtuner2
# title: radiolist.net
# description: Station list by continent+country
# url: http://radiolist.net/
# version: 0.5
# type: channel
# category: radio
# priority: obsolete
# png:
# iVBORw0KGgoAAAANSUhEUgAAABgAAAAYBAMAAAASWSDLAAAAFVBMVEVKb61qibyDnMegs9S6yeDV4O37/vyx66abAAAAAWJLR0QAiAUdSAAAAAlwSFlzAAALEwAACxMB
# AJqcGAAAAAd0SU1FB+ECDBAgLJqgZW4AAADoSURBVBjTNdBNj4MgEAbgqdLeZdo9C5NwFmo5Y7Wedauc1y/+/09YdLskkDwJmZl3IOxnON4A8frQhdc/7mG2cv3gx29X
# rdUfZuVHQ3JHEzZ7GSuNXxFV/FYYwryO6MOiZqEdnQPUC/fsXZaMuxa6MFfOVYN7kIWpHZClyJGLFjbbC617KaRUEJ4r4fU7IqNYrW5f2kgU5gZInG6MZ086eejcyIvO
# 1KwoLayoJjqnuWO5giW8msxVmBQXD5PttSlRm8TG2fDNZS3rRO/opeSCMnPa82xSmNgkfRxJ5yZxlPrPDmLu+7GqX4lERq4G0UEyAAAAAElFTkSuQmCC
# extraction-method: regex
#
# Radio station list grouped by continents and countries.
#
# NO LONGER LISTS STREAMING URLS (~ 2019), thus isn't interesting
# enough anymore for extended support.
import re
import action
import ahttp
from config import *
from channels import *
# radiolist.net
#
# · Groups stations by continents and countries.
#
# · Only fetches titles/homepages henceforth.
#
class radiolist (ChannelPlugin):
# module attributes
listformat = "href"
has_search = False
categories = ["Europe", "America", "Canada", "Australia"]
catmap = {"Albania": "albania", "America": "us", "Andorra": "andorra", "Australia": "au", "Austria": "austria", "Belarus": "belarus", "Belgium": "belgium", "Bulgaria": "bulgaria", "Canada": "can", "Croatia": "croatia", "Denmark": "denmark", "Estonia": "estonia", "Europe": "", "Finland": "finland", "France": "france", "Germany": "germany", "Greece": "greece", "Hungary": "hungary", "Iceland": "iceland", "Ireland": "ireland", "Italy": "italy", "Latvia": "latvia", "Liechtenstein": "liechtenstein", "Lithuania": "lithuania", "Luxembourg": "luxembourg", "Macedonia": "macedonia", "Malta": "malta", "Moldova": "moldova", "Monaco": "monaco", "Montenegro": "montenegro", "Netherlands": "netherlands", "New Zealand": "nz", "Norway": "norway", "Poland": "poland", "Portugal": "portugal", "Romania": "romania", "Russia": "russia", "Serbia": "serbia", "Slovakia": "slovakia", "Slovenia": "slovenia", "South America": "sa", "Spain": "spain", "Sweden": "sweden", "Switzerland": "switzerland", "Ukraine": "ukraine"}
titles = dict( genre="Genre", title="Station", playing="Location", bitrate="Bitrate", listeners=False )
# just a static list for now
def update_categories(self):
self.catmap = {"Europe":"", "America":"us", "Canada":"ca", "Australia":"au", "New Zealand":"nz", "South America":"sa"}
c = []#
rx_links = re.compile(r"""
<li \s+ id="item[\d-]+"> \s+ <!--[^>]+-->
\s+ <a\s+href="(?:https?://radiolist.net)?/((?:\w{2,3}/)?\w+)"
.+? <h3[^>]*>\s*([\w\s-]+?)\s*<
""", re.X|re.S)
for title in self.catmap.keys():
c.append(title)
html = ahttp.get("http://www.radiolist.net/" + self.catmap[title])
sub = []
for p,t in re.findall(rx_links, html):
log.I(p,t)
if t in ["Terms", "About Us", "Donation", "United States"]:
continue
sub.append(t)
self.catmap[t] = p
c.append(sorted(sub))
self.categories = c
# extraction rules
recipe = {
"block": """<li\s+id="item-\d+-\d+">(.+?)</li>""",
"split": None,
"fields": {
"title": 'data-item-title="(.+?)"',
"url": 'data-item-link="(http.+?)"',
"homepage": 'data-item-link="(.+?)"',
"favicon": '<img[^>]+src="(.+?)"',
"description": '<p\sclass="ca-sub">(.+?)</p>'
}
}
# extract stream urls
def update_streams(self, cat):
entries = []
html = ahttp.get("http://radiolist.net/" + self.catmap[cat])
for block in re.findall(self.recipe["block"], html, re.S):
log.HTML(block)
e = {"genre":"-", "playing":cat, "format":"text/html"}
for id,rx in self.recipe["fields"].iteritems():
uu = re.findall(rx, block)
log.RX(id,rx,uu)
if uu:
e[id] = unhtml(uu[0])
if "url" in e and "title" in e:
entries.append(e)
# done
[log.DATA(e) for e in entries]
return entries
|