14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 | # there's not a lot of information to fetch left. And this plugin is now back
# to defaulting to regex extraction instead of HTML parsing & DOM extraction.
#
#
#
import http
import urllib
import re
from config import conf, __print__, dbg
from pq import pq
#from channels import * # works everywhere but in this plugin(???!)
import channels
# SHOUTcast data module ----------------------------------------
class shoutcast(channels.ChannelPlugin):
# desc |
|
<
>
| 14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 | # there's not a lot of information to fetch left. And this plugin is now back
# to defaulting to regex extraction instead of HTML parsing & DOM extraction.
#
#
#
import ahttp as http
import re
from config import conf, __print__, dbg
from pq import pq
#from channels import * # works everywhere but in this plugin(???!)
import channels
from compat2and3 import urllib
# SHOUTcast data module ----------------------------------------
class shoutcast(channels.ChannelPlugin):
# desc |
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 | # sub-categories are queried per 'AJAX'
def update_categories(self):
html = http.get(self.base_url)
self.categories = []
__print__( dbg.DATA, html )
# <h2>Radio Genres</h2>
rx = re.compile(r'<li((?:\s+id="\d+"\s+class="files")?)><a href="\?action=sub&cat=([\w\s]+)#(\d+)">[\w\s]+</a>', re.S)
sub = []
for uu in rx.findall(html):
__print__( dbg.DATA, uu )
(main,name,id) = uu
name = urllib.unquote(name)
# main category
if main:
if sub:
self.categories.append(sub)
sub = [] |
|
|
| 62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 | # sub-categories are queried per 'AJAX'
def update_categories(self):
html = http.get(self.base_url)
self.categories = []
__print__( dbg.DATA, html )
# <h2>Radio Genres</h2>
rx = re.compile(r'<li((?:\s+id="\d+"\s+class="files")?)><a href="\?action=sub&cat=([\w\s]+)#(\d+)">[\w\s]+</a>', re.S)
sub = []
for uu in rx.findall(html):
__print__( dbg.DATA, uu )
(main,name,id) = uu
name = urllib.unquote(name)
# main category
if main:
if sub:
self.categories.append(sub)
sub = [] |
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132 |
#/radiolist.cfm?action=sub&string=&cat=Oldies&_cf_containerId=radiolist&_cf_nodebug=true&_cf_nocache=true&_cf_rc=0
#/radiolist.cfm?start=19&action=sub&string=&cat=Oldies&amount=18&order=listeners
# page
url = "http://www.shoutcast.com/radiolist.cfm?action=sub&string=&cat="+ucat+"&order=listeners&amount="+str(count)
__print__(dbg.HTTP, url)
referer = "http://www.shoutcast.com/?action=sub&cat="+ucat
params = {} # "strIndex":"0", "count":str(count), "ajax":"true", "mode":"listeners", "order":"desc" }
html = http.ajax(url, params, referer) #,feedback=self.parent.status)
__print__(dbg.DATA, html)
#__print__(re.compile("id=(\d+)").findall(html));
# With the new shallow <td> lists it doesn't make much sense to use
# the pyquery DOM traversal. There aren't any sensible selectors to
# extract values; it's just counting the tags.
|
|
|
|
| 115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132 |
#/radiolist.cfm?action=sub&string=&cat=Oldies&_cf_containerId=radiolist&_cf_nodebug=true&_cf_nocache=true&_cf_rc=0
#/radiolist.cfm?start=19&action=sub&string=&cat=Oldies&amount=18&order=listeners
# page
url = "http://www.shoutcast.com/radiolist.cfm?action=sub&string=&cat="+ucat+"&order=listeners&amount="+str(count)
__print__(dbg.HTTP, url)
referer = "http://www.shoutcast.com/?action=sub&cat="+ucat
params = {}
html = http.get(url, params=params, referer=referer, ajax=1)
#__print__(dbg.DATA, html)
#__print__(re.compile("id=(\d+)").findall(html));
# With the new shallow <td> lists it doesn't make much sense to use
# the pyquery DOM traversal. There aren't any sensible selectors to
# extract values; it's just counting the tags.
|