Index: contrib/liveradio.py ================================================================== --- contrib/liveradio.py +++ contrib/liveradio.py @@ -11,29 +11,33 @@ # iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAABB0lEQVR4nLWTQUpDMRCGv0lregDBI3gAfW/hRrp8ZOMh5PUMXkFcu7EbTxHd # CC4EhfQkQg/QR5txYQqvMdVHwdnMZJj555uQwH+YurpaNZUOqTWl5i5qGIusDxIAZgBGuBhCsiOgrq7WUa+tkReAjepHystQgmn8zt0As40y # skYa4HwfSS5w2otd8svtWurqHyvnCZcXAHRRW7v8nANnq6bSPk0ucFQS+M3G2fkduMqLrJF5d3zSTnyYATsXmhO89WLfix8A1NWjvwhek5+m # praLGibPC8knFwnEh4U1ct9FvUvoLk0uPbjiCgCPyd+KD0/WyKX4EPcJFLG2/8EaMeLDoE91sH0B3ERWq2CKMoYAAAAASUVORK5CYII= # priority: extra +# x-elevate: priority:default # extraction-method: regex, action-handler # # LiveRadio.ie, based in Ireland, is a radio station directory. It provides -# genre or country browsing (not in this plugin). It accepts user submissions. +# genre or country browsing (not in this plugin). Already lists over 5550 +# stations (more unique selections). Also accepts user submissions. # -# This channel loads their station logos as favicons, provides a live search. +# This channel loads their station logos as favicons. Even allows to utilize +# the live search function. # # However, station URLs have to be fetched in a second page request. Such # the listings are unsuitable for exporting right away. OTOH the website is # pretty fast; so no delay there or in fetching complete categories. +# import re from config import * from channels import * import ahttp import action -# Just a blog, needs per-page lookup +# Categorized directory, secondary URL lookup class liveradio (ChannelPlugin): # control flags has_search = True listformat = "srv" @@ -46,11 +50,11 @@ categories = ["Top 20"] catmap = {"Top 20":"top-20"} base = "http://www.liveradio.ie/" - # static + # Extract genre links and URL aliases (e.g. "Top 20" maps to "/top-20") def update_categories(self): html = ahttp.get("http://www.liveradio.ie/genres") self.categories = ["Top 20"] for row in re.findall(r"""([^<]+)""", html): self.categories.append(unhtml(row[1])) @@ -58,11 +62,11 @@ # Fetch entries def update_streams(self, cat, search=None): - # fetch + # Assemble HTML (collect 1..9 into single blob prior extraction) html = "" page = 1 while page < 9: page_sfx = "/%s"% page if page > 1 else "" if cat: @@ -73,11 +77,17 @@ if re.search('/\d+">Next', add): page += 1 else: break - # extract + # Extract all the things + # + # · entries utilize HTML5 microdata classification + # · title and genre available right away + # · img url is embedded + # · keep station ID as `urn:liveradion:12345` + # r = [] ls = re.findall(""" itemtype="http://schema.org/RadioStation"> .*? href="/stations/([\w-]+) .*?