Check-in [fd03da93ec]
Overview
Comment: | Simpler rx/dom alternation code. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
fd03da93ecc26d15f221e73687a76a7d |
User & Date: | mario on 2014-05-28 01:33:43 |
Other Links: | manifest | tags |
Context
2014-05-28
| ||
13:01 | New logo, updated documentation. check-in: 433bf11914 user: mario tags: trunk | |
01:33 | Simpler rx/dom alternation code. check-in: fd03da93ec user: mario tags: trunk | |
01:33 | Prevent ComboBoxText in settings dialog to take focus when scrolling over per mousewheel. check-in: 0d8e75189e user: mario tags: trunk | |
Changes
Modified channels/shoutcast.py from [a7dc35c676] to [1f0bcc3bc0].
︙ | ︙ | |||
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | "string": "", "cat": cat, "order": "listeners", "amount": conf.max_streams, } referer = "http://www.shoutcast.com/?action=sub&cat="+cat html = http.get(url, params=params, referer=referer, ajax=1) #__print__(dbg.DATA, html) #__print__(re.compile("id=(\d+)").findall(html)); # new html """ <tr> <td width="6%"><a href="#" onClick="window.open('player/?radname=Schlagerhoelle%20%2D%20das%20Paradies%20fr%20Schlager%20%20und%20Discofox&stationid=14687&coding=MP3','radplayer','height=232,width=776')"><img class="icon transition" src="/img/icon-play.png" alt="Play"></a></td> <td width="30%"><a class="transition" href="http://yp.shoutcast.com/sbin/tunein-station.pls?id=14687">Schlagerhoelle - das Paradies fr Schlager und Discofox</a></td> <td width="12%" style="text-align:left;" width="10%">Oldies</td> <td width="12%" style="text-align:left;" width="10%">955</td> <td width="12%" style="text-align:left;" width="10%">128</td> <td width="12%" style="text-align:left;" width="10%">MP3</td> </tr> """ # With the new shallow <td> lists it doesn't make much sense to use # the pyquery DOM traversal. There aren't any sensible selectors to # extract values; it's just counting the tags. # And there's a bug in PyQuery 1.2.4 and CssSelector. So make two # attempts, alternate between regex and DOM; user preference first. | > > < > | < < < | < | < | > | | < > | 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | "string": "", "cat": cat, "order": "listeners", "amount": conf.max_streams, } referer = "http://www.shoutcast.com/?action=sub&cat="+cat html = http.get(url, params=params, referer=referer, ajax=1) self.parent.status(0.75) #__print__(dbg.DATA, html) #__print__(re.compile("id=(\d+)").findall(html)); # new html """ <tr> <td width="6%"><a href="#" onClick="window.open('player/?radname=Schlagerhoelle%20%2D%20das%20Paradies%20fr%20Schlager%20%20und%20Discofox&stationid=14687&coding=MP3','radplayer','height=232,width=776')"><img class="icon transition" src="/img/icon-play.png" alt="Play"></a></td> <td width="30%"><a class="transition" href="http://yp.shoutcast.com/sbin/tunein-station.pls?id=14687">Schlagerhoelle - das Paradies fr Schlager und Discofox</a></td> <td width="12%" style="text-align:left;" width="10%">Oldies</td> <td width="12%" style="text-align:left;" width="10%">955</td> <td width="12%" style="text-align:left;" width="10%">128</td> <td width="12%" style="text-align:left;" width="10%">MP3</td> </tr> """ # With the new shallow <td> lists it doesn't make much sense to use # the pyquery DOM traversal. There aren't any sensible selectors to # extract values; it's just counting the tags. # # And there's a bug in PyQuery 1.2.4 and CssSelector. So make two # attempts, alternate between regex and DOM; user preference first. # for use_rx in [not conf.pyquery or not pq, conf.pyquery]: try: entries = (self.with_regex(html) if use_rx else self.with_dom(html)) if len(entries): break except Exception as e: __print__(dbg.ERR, e) continue return entries # Extract using regex def with_regex(self, html): __print__(dbg.PROC, "channels.shoutcast.update_streams: regex scraping mode") rx_stream = re.compile( |
︙ | ︙ | |||
176 177 178 179 180 181 182 | "format": self.mime_fmt(fmt), }] return entries # Iterate over DOM instead def with_dom(self, html): | | | 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | "format": self.mime_fmt(fmt), }] return entries # Iterate over DOM instead def with_dom(self, html): __print__(dbg.PROC, "channels.shoutcast.update_streams: attempt PyQuery/DOM traversal") entries = [] for div in (pq(e) for e in pq(html).find("tr")): entries.append({ "title": div.find("a.transition").text(), "url": div.find("a.transition").attr("href"), "homepage": "", "listeners": int(div.find("td:eq(3)").text()), |
︙ | ︙ |