Index: channels/liveradio.py ================================================================== --- channels/liveradio.py +++ channels/liveradio.py @@ -1,11 +1,11 @@ # encoding: UTF-8 # api: streamtuner2 # title: LiveRadio # description: Irish/worldwide radio station directory # url: http://liveradio.ie/ -# version: 0.4 +# version: 0.5 # type: channel # category: radio # config: - # { name: liveradio_tld, value: ie, type: select, select: ie=LiveRadio.ie|uk=LiveRadio.uk, description: Website to fetch from. } # png: @@ -87,10 +87,11 @@ if re.search('/\d+">Next', add): page += 1 else: break html = re.sub("[\s\S]+]*>", "", html) + log.DATA(html) # dom or regex if conf.pyquery: try: return self.pq_extract(html) @@ -106,11 +107,11 @@ # ยท keep station ID as `urn:liveradion:12345` # def rx_extract(self, html): r = [] ls = re.findall(""" - itemtype="http://schema.org/RadioStation"> .*? + itemtype="https?://schema.org/RadioStation"> .*? href="(?:https?://www.liveradio.\w+)?/stations/([\w-]+) .*? ([^<]+)< .*? itemprop="name">]+>([^<]+) .*? class="genre">([^<]+)< @@ -140,12 +141,12 @@
Funk, Soul
""" def pq_extract(self, html): r = [] html = pq(html).make_links_absolute(self.base) - for radio in html.find("*[itemscope][itemtype='http://schema.org/RadioStation']"): - #log.DATA(radio) + for radio in html.find("*[itemscope][itemtype='http://schema.org/RadioStation'], *[itemscope][itemtype='https://schema.org/RadioStation']"): + log.DATA(radio) radio = pq(radio) href = radio.find("*[itemprop='name'] a").attr("href") id = re.search("/([\w-]+)$", href).group(1) r.append(dict( homepage = self.base + "stations/" + id,