Check-in [0cfc0c113b]
Overview
Comment: | Update for changed itemtype= url (https) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
0cfc0c113be82fbdc44edda22481f1c4 |
User & Date: | mario on 2021-02-06 15:03:34 |
Other Links: | manifest | tags |
Context
2021-02-06
| ||
15:06 | Simplify config description to avoid `,` commas paired with `|` pipe separator (pluginconf regex is more relaxed where ST2 would strictly only allow either) check-in: 34bb0c97d8 user: mario tags: trunk | |
15:03 | Update for changed itemtype= url (https) check-in: 0cfc0c113b user: mario tags: trunk | |
2021-01-30
| ||
10:31 | Implement jamendo APIs new `download_allowed` flags (as simple state icons here - purely visual, not actually affecting download button / not even sure if streamripper works on all entry types). check-in: 3e718937a3 user: mario tags: trunk | |
Changes
Modified channels/liveradio.py from [68701f5e2f] to [5c053cf7e4].
1 2 3 4 5 | # encoding: UTF-8 # api: streamtuner2 # title: LiveRadio # description: Irish/worldwide radio station directory # url: http://liveradio.ie/ | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 | # encoding: UTF-8 # api: streamtuner2 # title: LiveRadio # description: Irish/worldwide radio station directory # url: http://liveradio.ie/ # version: 0.5 # type: channel # category: radio # config: - # { name: liveradio_tld, value: ie, type: select, select: ie=LiveRadio.ie|uk=LiveRadio.uk, description: Website to fetch from. } # png: # iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAABB0lEQVR4nLWTQUpDMRCGv0lregDBI3gAfW/hRrp8ZOMh5PUMXkFcu7EbTxHd # CC4EhfQkQg/QR5txYQqvMdVHwdnMZJj555uQwH+YurpaNZUOqTWl5i5qGIusDxIAZgBGuBhCsiOgrq7WUa+tkReAjepHystQgmn8zt0As40y |
︙ | ︙ | |||
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | add = ahttp.get(self.base + "stations" + page_sfx, { "text": search, "country_id": "", "genre_id": ""}) html += add if re.search('/\d+">Next</a>', add): page += 1 else: break html = re.sub("</body>[\s\S]+<body[^>]*>", "", html) # dom or regex if conf.pyquery: try: return self.pq_extract(html) except Exception as e: log.ERR(e) return self.rx_extract(html) # Extract all the things # # · entries utilize HTML5 microdata classification # · title and genre available right away # · img url is embedded # · keep station ID as `urn:liveradion:12345` # def rx_extract(self, html): r = [] ls = re.findall(""" | > | | 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | add = ahttp.get(self.base + "stations" + page_sfx, { "text": search, "country_id": "", "genre_id": ""}) html += add if re.search('/\d+">Next</a>', add): page += 1 else: break html = re.sub("</body>[\s\S]+<body[^>]*>", "", html) log.DATA(html) # dom or regex if conf.pyquery: try: return self.pq_extract(html) except Exception as e: log.ERR(e) return self.rx_extract(html) # Extract all the things # # · entries utilize HTML5 microdata classification # · title and genre available right away # · img url is embedded # · keep station ID as `urn:liveradion:12345` # def rx_extract(self, html): r = [] ls = re.findall(""" itemtype="https?://schema.org/RadioStation"> .*? href="(?:https?://www.liveradio.\w+)?/stations/([\w-]+) .*? <img\s+src="/(files/images/[^"]+)" .*? ="country">([^<]+)< .*? itemprop="name"><a[^>]+>([^<]+)</a> .*? class="genre">([^<]+)< """, html, re.X|re.S) for row in ls: |
︙ | ︙ | |||
138 139 140 141 142 143 144 | </a> <div class="name" itemprop="name"><a href="http://www.liveradio.ie/stations/soulconnexion-radio">Soulconnexion Radio</a></div> <div class="genre">Funk, Soul</div> """ def pq_extract(self, html): r = [] html = pq(html).make_links_absolute(self.base) | | | | 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | </a> <div class="name" itemprop="name"><a href="http://www.liveradio.ie/stations/soulconnexion-radio">Soulconnexion Radio</a></div> <div class="genre">Funk, Soul</div> """ def pq_extract(self, html): r = [] html = pq(html).make_links_absolute(self.base) for radio in html.find("*[itemscope][itemtype='http://schema.org/RadioStation'], *[itemscope][itemtype='https://schema.org/RadioStation']"): log.DATA(radio) radio = pq(radio) href = radio.find("*[itemprop='name'] a").attr("href") id = re.search("/([\w-]+)$", href).group(1) r.append(dict( homepage = self.base + "stations/" + id, url = "urn:liveradio:" + id, playing = radio.find("*.country").text(), |
︙ | ︙ |