Check-in [857f6e2745]
Overview
Comment: | Utilize PyQuery .make_links_absolute(), to shorten img src extraction. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
857f6e2745c161bd9e9aa9776a58bb94 |
User & Date: | mario on 2020-05-13 17:46:38 |
Other Links: | manifest | tags |
Context
2020-05-13
| ||
18:59 | Add .click() counting callback. check-in: 68bbdda1e4 user: mario tags: trunk | |
17:46 | Utilize PyQuery .make_links_absolute(), to shorten img src extraction. check-in: 857f6e2745 user: mario tags: trunk | |
17:46 | Remove dirble plugin. check-in: 5abe4d3e7d user: mario tags: trunk | |
Changes
Modified channels/liveradio.py from [479933ebd6] to [662509f581].
︙ | ︙ | |||
102 103 104 105 106 107 108 | href="(?:https?://www.liveradio.ie)?/stations/([\w-]+) .*? <img\s+src="/(files/images/[^"]+)" .*? ="country">([^<]+)< .*? itemprop="name"><a[^>]+>([^<]+)</a> .*? class="genre">([^<]+)< """, html, re.X|re.S) for row in ls: | | | 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | href="(?:https?://www.liveradio.ie)?/stations/([\w-]+) .*? <img\s+src="/(files/images/[^"]+)" .*? ="country">([^<]+)< .*? itemprop="name"><a[^>]+>([^<]+)</a> .*? class="genre">([^<]+)< """, html, re.X|re.S) for row in ls: #log.DATA(row) id, img, country, title, genre = row r.append(dict( homepage = self.base + "stations/" + id, url = "urn:liveradio:" + id, playing = unhtml(country), title = unhtml(title), genre = unhtml(genre), |
︙ | ︙ | |||
127 128 129 130 131 132 133 | <span class="country">United Kingdom</span> </a> <div class="name" itemprop="name"><a href="http://www.liveradio.ie/stations/soulconnexion-radio">Soulconnexion Radio</a></div> <div class="genre">Funk, Soul</div> """ def pq_extract(self, html): r = [] | > | | | | 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | <span class="country">United Kingdom</span> </a> <div class="name" itemprop="name"><a href="http://www.liveradio.ie/stations/soulconnexion-radio">Soulconnexion Radio</a></div> <div class="genre">Funk, Soul</div> """ def pq_extract(self, html): r = [] html = pq(html).make_links_absolute(self.base) for radio in html.find("*[itemscope][itemtype='http://schema.org/RadioStation']"): #log.DATA(radio) radio = pq(radio) href = radio.find("*[itemprop='name'] a").attr("href") id = re.search("/([\w-]+)$", href).group(1) r.append(dict( homepage = self.base + "stations/" + id, url = "urn:liveradio:" + id, playing = radio.find("*.country").text(), title = radio.find("*[itemprop='name']").text(), genre = radio.find("*.genre").text(), img = radio.find("img[itemprop='image']").attr("src") )) return r # Update `url` on station data access (incurs a delay for playing or recording) # # ยท utilizes action.handler["urn:liveradio"] โ urn_resolve hook |
︙ | ︙ |