Index: contrib/liveradio.py
==================================================================
--- contrib/liveradio.py
+++ contrib/liveradio.py
@@ -11,29 +11,33 @@
# iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAABB0lEQVR4nLWTQUpDMRCGv0lregDBI3gAfW/hRrp8ZOMh5PUMXkFcu7EbTxHd
# CC4EhfQkQg/QR5txYQqvMdVHwdnMZJj555uQwH+YurpaNZUOqTWl5i5qGIusDxIAZgBGuBhCsiOgrq7WUa+tkReAjepHystQgmn8zt0As40y
# skYa4HwfSS5w2otd8svtWurqHyvnCZcXAHRRW7v8nANnq6bSPk0ucFQS+M3G2fkduMqLrJF5d3zSTnyYATsXmhO89WLfix8A1NWjvwhek5+m
# praLGibPC8knFwnEh4U1ct9FvUvoLk0uPbjiCgCPyd+KD0/WyKX4EPcJFLG2/8EaMeLDoE91sH0B3ERWq2CKMoYAAAAASUVORK5CYII=
# priority: extra
+# x-elevate: priority:default
# extraction-method: regex, action-handler
#
# LiveRadio.ie, based in Ireland, is a radio station directory. It provides
-# genre or country browsing (not in this plugin). It accepts user submissions.
+# genre or country browsing (not in this plugin). Already lists over 5550
+# stations (more unique selections). Also accepts user submissions.
#
-# This channel loads their station logos as favicons, provides a live search.
+# This channel loads their station logos as favicons. Even allows to utilize
+# the live search function.
#
# However, station URLs have to be fetched in a second page request. Such
# the listings are unsuitable for exporting right away. OTOH the website is
# pretty fast; so no delay there or in fetching complete categories.
+#
import re
from config import *
from channels import *
import ahttp
import action
-# Just a blog, needs per-page lookup
+# Categorized directory, secondary URL lookup
class liveradio (ChannelPlugin):
# control flags
has_search = True
listformat = "srv"
@@ -46,11 +50,11 @@
categories = ["Top 20"]
catmap = {"Top 20":"top-20"}
base = "http://www.liveradio.ie/"
- # static
+ # Extract genre links and URL aliases (e.g. "Top 20" maps to "/top-20")
def update_categories(self):
html = ahttp.get("http://www.liveradio.ie/genres")
self.categories = ["Top 20"]
for row in re.findall(r"""([^<]+)""", html):
self.categories.append(unhtml(row[1]))
@@ -58,11 +62,11 @@
# Fetch entries
def update_streams(self, cat, search=None):
- # fetch
+ # Assemble HTML (collect 1..9 into single blob prior extraction)
html = ""
page = 1
while page < 9:
page_sfx = "/%s"% page if page > 1 else ""
if cat:
@@ -73,11 +77,17 @@
if re.search('/\d+">Next', add):
page += 1
else:
break
- # extract
+ # Extract all the things
+ #
+ # · entries utilize HTML5 microdata classification
+ # · title and genre available right away
+ # · img url is embedded
+ # · keep station ID as `urn:liveradion:12345`
+ #
r = []
ls = re.findall("""
itemtype="http://schema.org/RadioStation"> .*?
href="/stations/([\w-]+) .*?