1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
# encoding: UTF-8
# api: streamtuner2
# title: streamlicensing
# description: Smaller streaming service provider
# url: http://www.streamlicensing.com/directory/
# version: 0.1
# type: channel
# category: radio
# priority: extra
# png:
# iVBORw0KGgoAAAANSUhEUgAAABAAAAAQBAMAAADt3eJSAAAAGFBMVEVhcAAODy8rLVpHS4RPU22DismUmLTm6va3Zc/ZAAAAAXRSTlMAQObYZgAA
# AAFiS0dEAIgFHUgAAAAJcEhZcwAACxMAAAsTAQCanBgAAAAHdElNRQfhAgwUMiN14qDwAAAAW0lEQVQI12NggAADCMVW7gym3crBDJb08rJgZbB4
# qqEQmBGsKMQEZggqMLCA1LAJM8AZaWUQhjpEO5uwamiQMJjBrChkADSlXBhsfHh5qgCYoWysqACxWQlCAwArBw5QNfhFygAAAABJRU5ErkJggg==
# extraction-method: regex
#
# Streaming service provider, which ensures station legality or fees
# according to US copyright/streaming laws.
#
# Has only major categories. Does not provide channel homepages, and
# is a bit slow due to huge page sizes. No search function implemented
# here.
#
import re
import ahttp
from config import *
from channels import *
# radiolist.net
class streamlicensing (ChannelPlugin):
# module attributes
listformat = "pls"
has_search = False
categories = []
catmap = {}
titles = dict( listeners=False )
# config (not as plugin options here)
conf_family_unsafe = 0
conf_maxpages = 3
# magic values
base_url = "http://www.streamlicensing.com/directory/"
pls_sffx = "%sindex.cgi/playlist.pls?action=playlist&type=pls&sid=%s&stream_id=%s"
# This is well hidden, but it comes with a playlist generator, so doesn't require double lookups.
# http://www.streamlicensing.com/directory/index.cgi?action=webpro_links&sid=4785&start=1&g=14&e=1&s=
# .../directory/index.cgi/playlist.pls?action=playlist&type=pls&sid=4785&stream_id=1234
# just a static list for now
def update_categories(self):
html = ahttp.get(self.base_url)
cats = re.findall('"\?start=&g=(\d+)&e=&s="><.+?>([\w\s-]+)</span>', html)
self.categories = sorted([c[1] for c in cats])
self.catmap = dict([(t,i) for i,t in cats])
# extract stream urls
def update_streams(self, cat):
# prep block regex
rx_genre = re.compile(r"""
<tr\sid='(\d+)' .*?
Station\sName:<.*?>([^<]+)</(?:span|font|td|a)> .*?
^var\slastsong_\d+\s*=\s*'([^\n]+)'; .*?
<a[^>]+onClick=[^>]+&stream_id=(\d+)'[^>]+>(\d+)k<
""", re.I|re.S|re.X|re.M)
# collect pages into single string
html = ""
for page in range(0, self.conf_maxpages):
html += ahttp.get("%s?start=%s&g=%s&e=%s&s=" % (self.base_url, page * 10, self.catmap[cat], self.conf_family_unsafe))
if not re.search("\?start=%s.*>Next" % ((page + 1) * 10), html):
break
html = re.sub(">Featured Stations.+?>Previous Page", "", html, 100, re.S)
print html
# extract and convert to station rows
entries = []
for uu in re.findall(rx_genre, html):
print uu
entries.append(dict(
genre = cat,
|
|
|
|
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
|
|
<
<
<
>
>
>
<
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
# encoding: UTF-8
# api: streamtuner2
# title: streamlicensing
# description: Smaller streaming service provider
# url: http://www.streamlicensing.com/directory/
# version: 0.2
# type: channel
# category: radio
# priority: extra
# png:
# iVBORw0KGgoAAAANSUhEUgAAABAAAAAQBAMAAADt3eJSAAAAGFBMVEVhcAAODy8rLVpHS4RPU22DismUmLTm6va3Zc/ZAAAAAXRSTlMAQObYZgAA
# AAFiS0dEAIgFHUgAAAAJcEhZcwAACxMAAAsTAQCanBgAAAAHdElNRQfhAgwUMiN14qDwAAAAW0lEQVQI12NggAADCMVW7gym3crBDJb08rJgZbB4
# qqEQmBGsKMQEZggqMLCA1LAJM8AZaWUQhjpEO5uwamiQMJjBrChkADSlXBhsfHh5qgCYoWysqACxWQlCAwArBw5QNfhFygAAAABJRU5ErkJggg==
# extraction-method: regex
#
# Streaming service provider, which ensures station legality and fees
# in accordance with US copyright conversion corporations.
#
# Has only major categories. Does not provide channel homepages, and
# is a bit slow due to huge page sizes. No search function implemented
# here.
#
import re
import ahttp
from config import *
from channels import *
# streamlicensing.com
#
# ยท Provides around 20 categories, associated to numeric ids (?g= parameter)
#
# ยท Station lists are one <tr> block each, with a JS inline script associating
# some web player parameters.
#
# ยท Each station has a station id=, but also a stream_id= for the playlist
# generator.
#
# ยท Normally just provides a web player, but used to show .pls links for various
# players. Meanwhile this is hidden, but the playlist generator is still
# available - so doesn't require double lookups.
# โ http://www.streamlicensing.com/directory/index.cgi?action=webpro_links&sid=4785&start=1&g=14&e=1&s=
# โ .../directory/index.cgi/playlist.pls?action=playlist&type=pls&sid=4785&stream_id=1234
#
# ยท family_safe and maxpages are hardcoded config options for now.
#
class streamlicensing (ChannelPlugin):
# module attributes
listformat = "pls"
has_search = False
categories = []
catmap = {}
titles = dict( listeners=False )
# config (not as plugin options here)
conf_family_unsafe = 0
conf_maxpages = max(int(int(conf.max_streams) / 100), 1)
# magic values
base_url = "http://www.streamlicensing.com/directory/"
pls_sffx = "%sindex.cgi/playlist.pls?action=playlist&type=pls&sid=%s&stream_id=%s"
# fetch category titles and catmap
def update_categories(self):
html = ahttp.get(self.base_url)
cats = re.findall('"\?start=&g=(\d+)&e=&s="><.+?>([\w\s-]+)</span>', html)
self.categories = sorted([c[1] for c in cats])
self.catmap = dict([(t,i) for i,t in cats])
# extract stream urls
def update_streams(self, cat):
# prep block regex
rx_genre = re.compile(r"""
<tr\sid='(\d+)' .*?
Station\sName:<.*?>([^<]+)</(?:span|font|td|a)> .*?
^var\slastsong_\d+\s*=\s*'([^\n]+)'; .*?
<a[^>]+onClick=[^>]+&stream_id=(\d+)'[^>]+>(\d+)k<
""", re.I|re.S|re.X|re.M)
# collect pages into single string
html = ""
for page in range(0, self.conf_maxpages):
self.progress(self.conf_maxpages, page)
html += ahttp.get("%s?start=%s&g=%s&e=%s&s=" % (self.base_url, page * 10, self.catmap[cat], self.conf_family_unsafe))
if not re.search("\?start=%s.*>Next" % ((page + 1) * 10), html):
break
html = re.sub(">Featured Stations.+?>Previous Page", "", html, 100, re.S)
# extract and convert to station rows
entries = []
for uu in re.findall(rx_genre, html):
print uu
entries.append(dict(
genre = cat,
|