Index: channels/shoutcast.py
==================================================================
--- channels/shoutcast.py
+++ channels/shoutcast.py
@@ -74,29 +74,29 @@
# extracts the category list from shoutcast.com,
# sub-categories are queried per 'AJAX'
def update_categories(self):
html = http.get(self.base_url)
- self.categories = ["default"]
+ self.categories = []
__print__( html )
#
Radio Genres
- rx_main = re.compile(r'[\w\s]+', re.S)
- rx_sub = re.compile(r'[\w\s\d]+')
- for uu in rx_main.findall(html):
+ rx = re.compile(r'[\w\s]+', re.S)
+ sub = []
+ for uu in rx.findall(html):
__print__(uu)
- (id,name) = uu
+ (main,name,id) = uu
name = urllib.unquote(name)
# main category
- self.categories.append(name)
-
- # sub entries
- html = http.ajax("http://shoutcast.com/genre.jsp", {"genre":name, "id":id})
- __print__(html)
- sub = rx_sub.findall(html)
- self.categories.append(sub)
+ if main:
+ if sub:
+ self.categories.append(sub)
+ sub = []
+ self.categories.append(name)
+ else:
+ sub.append(name)
# it's done
__print__(self.categories)
conf.save("cache/categories_shoutcast", self.categories)
pass
@@ -120,94 +120,105 @@
entries = []
next = 0
max = int(conf.max_streams)
count = max
rx_stream = None
- rx_next = re.compile("""onclick="showMoreGenre""")
try:
- while (next < max):
+ if (next < max):
+
+ #/radiolist.cfm?action=sub&string=&cat=Oldies&_cf_containerId=radiolist&_cf_nodebug=true&_cf_nocache=true&_cf_rc=0
+ #/radiolist.cfm?start=19&action=sub&string=&cat=Oldies&amount=18&order=listeners
# page
- url = "http://www.shoutcast.com/genre-ajax/" + ucat
- referer = url.replace("/genre-ajax", "/radio")
- params = { "strIndex":"0", "count":str(count), "ajax":"true", "mode":"listeners", "order":"desc" }
+ url = "http://www.shoutcast.com/radiolist.cfm?action=sub&string=&cat="+ucat+"&order=listeners&amount="+str(count)
+ __print__(url)
+ referer = "http://www.shoutcast.com/?action=sub&cat="+ucat
+ params = {} # "strIndex":"0", "count":str(count), "ajax":"true", "mode":"listeners", "order":"desc" }
html = http.ajax(url, params, referer) #,feedback=self.parent.status)
__print__(html)
+ __print__(re.compile("id=(\d+)").findall(html));
# regular expressions
- if not conf.get("pyquery") or not pq:
+ if 1: #not conf.get("pyquery") or not pq:
+
+ # new html
+ """
+
+  |
+ Schlagerhoelle - das Paradies fr Schlager und Discofox |
+ Oldies |
+ 955 |
+ 128 |
+ MP3 |
+
+ """
# new extraction regex
if not rx_stream:
rx_stream = re.compile(
"""
- ]+id="(\d+)".+?
- ]+href="(http://[^">]+)"[^>]*>([^<>]+).+?
- (?:Recently\s*played|Coming\s*soon|Now\s*playing):\s*([^<]*).+?
- ners">(\d*)<.+?
- bitrate">(\d*)<.+?
- type">([MP3AAC]*)
+ ]+ href="http://yp.shoutcast.com/sbin/tunein-station.pls\?
+ id=(\d+)"> ([^<>]+)
+ \s+ ]+ >([^<>]+) |
+ \s+ ]+ >(\d+) |
+ \s+ ]+ >(\d+) |
+ \s+ ]+ >(\w+) |
""",
re.S|re.I|re.X
)
+ __print__( rx_stream)
# extract entries
self.parent.status("parsing document...")
__print__("loop-rx")
for m in rx_stream.findall(html):
- (id, homepage, title, playing, ls, bit, fmt) = m
- __print__(uu)
+ __print__(m)
+ (id, title, genre, listeners, bitrate, fmt) = m
entries += [{
- "title": self.entity_decode(title),
+ "id": id,
"url": "http://yp.shoutcast.com/sbin/tunein-station.pls?id=" + id,
- "homepage": http.fix_url(homepage),
- "playing": self.entity_decode(playing),
- "genre": cat, #self.strip_tags(uu[4]),
- "listeners": int(ls),
- "max": 0, #int(uu[6]),
- "bitrate": int(bit),
+ "title": self.entity_decode(title),
+ #"homepage": http.fix_url(homepage),
+ #"playing": self.entity_decode(playing),
+ "genre": genre,
+ "listeners": int(listeners),
+ #"max": 0, #int(uu[6]),
+ "bitrate": int(bitrate),
"format": self.mime_fmt(fmt),
}]
# PyQuery parsing
else:
# iterate over DOM
- for div in (pq(e) for e in pq(html).find("div.dirlist")):
+ for div in (pq(e) for e in pq(html).find("tr")):
entries.append({
- "title": div.find("a.playbutton,a.playbutton1").attr("title"),
- "url": div.find("a.playbutton,a.playbutton1").attr("href"),
- "homepage": http.fix_url(div.find("a.div_website").attr("href")),
- "playing": div.find("div.playingtext").attr("title"),
- # "title": div.find("a.clickabletitleGenre, div.stationcol a").attr("title"),
- # "url": div.find("a.playbutton, a.playbutton1, a.playimage").attr("href"),
- # "homepage": http.fix_url(div.find("a.playbutton.clickabletitle, a[target=_blank], a.clickabletitleGenre, a.clickabletitle, div.stationcol a, a").attr("href")),
- # "playing": div.find("div.playingtextGenre, div.playingtext").attr("title"),
- "listeners": int(div.find("div.dirlistners").text()),
- "bitrate": int(div.find("div.dirbitrate").text()),
- "format": self.mime_fmt(div.find("div.dirtype").text()),
+ "title": div.find("a.transition").text(),
+ "url": div.find("a.transition").attr("href"),
+ "homepage": "",
+ "playing": div.find("td:eq(2)").text(),
+ "listeners": int(div.find("td:eq(4)").text()),
+ "bitrate": int(div.find("td:eq(5)").text()),
+ "format": self.mime_fmt(div.find("td:eq(6)").text()),
"max": 0,
"genre": cat,
- # "title2": e.find("a.playbutton").attr("name"),
})
# display partial results (not strictly needed anymore, because we fetch just one page)
self.parent.status()
self.update_streams_partially_done(entries)
# more pages to load?
- if (re.search(rx_next, html)):
- next += count
- else:
- next = 99999
+ next = 99999
- except:
+ except Exception as e:
+ __print__(e)
return entries
#fin
__print__(entries)
return entries