Index: channels/shoutcast.py ================================================================== --- channels/shoutcast.py +++ channels/shoutcast.py @@ -74,29 +74,29 @@ # extracts the category list from shoutcast.com, # sub-categories are queried per 'AJAX' def update_categories(self): html = http.get(self.base_url) - self.categories = ["default"] + self.categories = [] __print__( html ) #

Radio Genres

- rx_main = re.compile(r'
  • [\w\s]+
  • ', re.S) - rx_sub = re.compile(r'[\w\s\d]+') - for uu in rx_main.findall(html): + rx = re.compile(r'[\w\s]+', re.S) + sub = [] + for uu in rx.findall(html): __print__(uu) - (id,name) = uu + (main,name,id) = uu name = urllib.unquote(name) # main category - self.categories.append(name) - - # sub entries - html = http.ajax("http://shoutcast.com/genre.jsp", {"genre":name, "id":id}) - __print__(html) - sub = rx_sub.findall(html) - self.categories.append(sub) + if main: + if sub: + self.categories.append(sub) + sub = [] + self.categories.append(name) + else: + sub.append(name) # it's done __print__(self.categories) conf.save("cache/categories_shoutcast", self.categories) pass @@ -120,94 +120,105 @@ entries = [] next = 0 max = int(conf.max_streams) count = max rx_stream = None - rx_next = re.compile("""onclick="showMoreGenre""") try: - while (next < max): + if (next < max): + + #/radiolist.cfm?action=sub&string=&cat=Oldies&_cf_containerId=radiolist&_cf_nodebug=true&_cf_nocache=true&_cf_rc=0 + #/radiolist.cfm?start=19&action=sub&string=&cat=Oldies&amount=18&order=listeners # page - url = "http://www.shoutcast.com/genre-ajax/" + ucat - referer = url.replace("/genre-ajax", "/radio") - params = { "strIndex":"0", "count":str(count), "ajax":"true", "mode":"listeners", "order":"desc" } + url = "http://www.shoutcast.com/radiolist.cfm?action=sub&string=&cat="+ucat+"&order=listeners&amount="+str(count) + __print__(url) + referer = "http://www.shoutcast.com/?action=sub&cat="+ucat + params = {} # "strIndex":"0", "count":str(count), "ajax":"true", "mode":"listeners", "order":"desc" } html = http.ajax(url, params, referer) #,feedback=self.parent.status) __print__(html) + __print__(re.compile("id=(\d+)").findall(html)); # regular expressions - if not conf.get("pyquery") or not pq: + if 1: #not conf.get("pyquery") or not pq: + + # new html + """ + + Play + Schlagerhoelle - das Paradies fr Schlager und Discofox + Oldies + 955 + 128 + MP3 + + """ # new extraction regex if not rx_stream: rx_stream = re.compile( """ - ]+id="(\d+)".+? - ]+href="(http://[^">]+)"[^>]*>([^<>]+).+? - (?:Recently\s*played|Coming\s*soon|Now\s*playing):\s*([^<]*).+? - ners">(\d*)<.+? - bitrate">(\d*)<.+? - type">([MP3AAC]*) + ]+ href="http://yp.shoutcast.com/sbin/tunein-station.pls\? + id=(\d+)"> ([^<>]+) + \s+ ]+ >([^<>]+) + \s+ ]+ >(\d+) + \s+ ]+ >(\d+) + \s+ ]+ >(\w+) """, re.S|re.I|re.X ) + __print__( rx_stream) # extract entries self.parent.status("parsing document...") __print__("loop-rx") for m in rx_stream.findall(html): - (id, homepage, title, playing, ls, bit, fmt) = m - __print__(uu) + __print__(m) + (id, title, genre, listeners, bitrate, fmt) = m entries += [{ - "title": self.entity_decode(title), + "id": id, "url": "http://yp.shoutcast.com/sbin/tunein-station.pls?id=" + id, - "homepage": http.fix_url(homepage), - "playing": self.entity_decode(playing), - "genre": cat, #self.strip_tags(uu[4]), - "listeners": int(ls), - "max": 0, #int(uu[6]), - "bitrate": int(bit), + "title": self.entity_decode(title), + #"homepage": http.fix_url(homepage), + #"playing": self.entity_decode(playing), + "genre": genre, + "listeners": int(listeners), + #"max": 0, #int(uu[6]), + "bitrate": int(bitrate), "format": self.mime_fmt(fmt), }] # PyQuery parsing else: # iterate over DOM - for div in (pq(e) for e in pq(html).find("div.dirlist")): + for div in (pq(e) for e in pq(html).find("tr")): entries.append({ - "title": div.find("a.playbutton,a.playbutton1").attr("title"), - "url": div.find("a.playbutton,a.playbutton1").attr("href"), - "homepage": http.fix_url(div.find("a.div_website").attr("href")), - "playing": div.find("div.playingtext").attr("title"), - # "title": div.find("a.clickabletitleGenre, div.stationcol a").attr("title"), - # "url": div.find("a.playbutton, a.playbutton1, a.playimage").attr("href"), - # "homepage": http.fix_url(div.find("a.playbutton.clickabletitle, a[target=_blank], a.clickabletitleGenre, a.clickabletitle, div.stationcol a, a").attr("href")), - # "playing": div.find("div.playingtextGenre, div.playingtext").attr("title"), - "listeners": int(div.find("div.dirlistners").text()), - "bitrate": int(div.find("div.dirbitrate").text()), - "format": self.mime_fmt(div.find("div.dirtype").text()), + "title": div.find("a.transition").text(), + "url": div.find("a.transition").attr("href"), + "homepage": "", + "playing": div.find("td:eq(2)").text(), + "listeners": int(div.find("td:eq(4)").text()), + "bitrate": int(div.find("td:eq(5)").text()), + "format": self.mime_fmt(div.find("td:eq(6)").text()), "max": 0, "genre": cat, - # "title2": e.find("a.playbutton").attr("name"), }) # display partial results (not strictly needed anymore, because we fetch just one page) self.parent.status() self.update_streams_partially_done(entries) # more pages to load? - if (re.search(rx_next, html)): - next += count - else: - next = 99999 + next = 99999 - except: + except Exception as e: + __print__(e) return entries #fin __print__(entries) return entries