Check-in [857f6e2745]
Overview
| Comment: | Utilize PyQuery .make_links_absolute(), to shorten img src extraction. |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
857f6e2745c161bd9e9aa9776a58bb94 |
| User & Date: | mario on 2020-05-13 17:46:38 |
| Other Links: | manifest | tags |
Context
|
2020-05-13
| ||
| 18:59 | Add .click() counting callback. check-in: 68bbdda1e4 user: mario tags: trunk | |
| 17:46 | Utilize PyQuery .make_links_absolute(), to shorten img src extraction. check-in: 857f6e2745 user: mario tags: trunk | |
| 17:46 | Remove dirble plugin. check-in: 5abe4d3e7d user: mario tags: trunk | |
Changes
Modified channels/liveradio.py from [479933ebd6] to [662509f581].
| ︙ | ︙ | |||
102 103 104 105 106 107 108 |
href="(?:https?://www.liveradio.ie)?/stations/([\w-]+) .*?
<img\s+src="/(files/images/[^"]+)" .*?
="country">([^<]+)< .*?
itemprop="name"><a[^>]+>([^<]+)</a> .*?
class="genre">([^<]+)<
""", html, re.X|re.S)
for row in ls:
| | | 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
href="(?:https?://www.liveradio.ie)?/stations/([\w-]+) .*?
<img\s+src="/(files/images/[^"]+)" .*?
="country">([^<]+)< .*?
itemprop="name"><a[^>]+>([^<]+)</a> .*?
class="genre">([^<]+)<
""", html, re.X|re.S)
for row in ls:
#log.DATA(row)
id, img, country, title, genre = row
r.append(dict(
homepage = self.base + "stations/" + id,
url = "urn:liveradio:" + id,
playing = unhtml(country),
title = unhtml(title),
genre = unhtml(genre),
|
| ︙ | ︙ | |||
127 128 129 130 131 132 133 |
<span class="country">United Kingdom</span>
</a>
<div class="name" itemprop="name"><a href="http://www.liveradio.ie/stations/soulconnexion-radio">Soulconnexion Radio</a></div>
<div class="genre">Funk, Soul</div>
"""
def pq_extract(self, html):
r = []
| > | | | | 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
<span class="country">United Kingdom</span>
</a>
<div class="name" itemprop="name"><a href="http://www.liveradio.ie/stations/soulconnexion-radio">Soulconnexion Radio</a></div>
<div class="genre">Funk, Soul</div>
"""
def pq_extract(self, html):
r = []
html = pq(html).make_links_absolute(self.base)
for radio in html.find("*[itemscope][itemtype='http://schema.org/RadioStation']"):
#log.DATA(radio)
radio = pq(radio)
href = radio.find("*[itemprop='name'] a").attr("href")
id = re.search("/([\w-]+)$", href).group(1)
r.append(dict(
homepage = self.base + "stations/" + id,
url = "urn:liveradio:" + id,
playing = radio.find("*.country").text(),
title = radio.find("*[itemprop='name']").text(),
genre = radio.find("*.genre").text(),
img = radio.find("img[itemprop='image']").attr("src")
))
return r
# Update `url` on station data access (incurs a delay for playing or recording)
#
# ยท utilizes action.handler["urn:liveradio"] โ urn_resolve hook
|
| ︙ | ︙ |