Check-in [4eccc8b70a]
Overview
Comment: | Implement search function for I-R, fix DOM extraction method. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
4eccc8b70a9e6903f803f05cd19aeec2 |
User & Date: | mario on 2016-12-26 21:54:10 |
Other Links: | manifest | tags |
Context
2016-12-27
| ||
21:20 | Remove stale comment, fix whitespace issue check-in: b5753830fa user: mario tags: trunk | |
2016-12-26
| ||
21:54 | Implement search function for I-R, fix DOM extraction method. check-in: 4eccc8b70a user: mario tags: trunk | |
20:43 | Use file:/// urls again. Escape backslashes for Windows now. check-in: 9bef9860b6 user: Oliver tags: trunk | |
Changes
Modified channels/internet_radio.py from [e2da369cc7] to [78008b144e].
1 2 3 4 5 | # api: streamtuner2 # title: Internet-Radio # description: Broad list of webradios from all genres. # type: channel # category: radio | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 | # api: streamtuner2 # title: Internet-Radio # description: Broad list of webradios from all genres. # type: channel # category: radio # version: 1.5 # url: http://www.internet-radio.com/ # config: # { name: internetradio_max_pages, type: int, value: 5, category: limit, description: How many pages to fetch and read. } # priority: standard # png: # iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABHNCSVQICAgIfAhkiAAAAaZJREFUOI2N0j1PlEEUBeBnlsVoRJcCPwINxIJGAoWVFbVK4g8w # oUUTFRNbG3+FhVErK36BhcYCrTR8FS4mRGUXFEKCUizRwLXgnfV1Awk3mUzmnHPvPXNnUkSE40RKFYxhHKMYxFb1GIlnMLVN/etBUjuGWDm6wEHHyQbLW/Qd # JTu8QEq9mJlnogz3sHOJV3iHz2iKzuDiH+bm+J3XD74EU5Gc7pSn/4aYUi14s8BIhkZ5puKhvdgpNFVccaA5oaP7TO66SCuYKnG9weMmjaz5yadqqfvkPE/z |
︙ | ︙ | |||
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | # streams and gui class internet_radio (ChannelPlugin): # control data listformat = "pls" categories = [] base_url = "https://www.internet-radio.com/" # load genres def update_categories(self): html = ahttp.get(self.base_url) rx = re.compile("""="/stations/[-+&.\w\s%]+/">([^<]+)<""") cats = rx.findall(html) cats = list(set(cats)) cats = [s.capitalize() for s in cats] self.categories = sorted(list(set(cats))) # fetch station lists | > | | > | | < < < > > > | | 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | # streams and gui class internet_radio (ChannelPlugin): # control data listformat = "pls" categories = [] base_url = "https://www.internet-radio.com/" has_search = True # load genres def update_categories(self): html = ahttp.get(self.base_url) rx = re.compile("""="/stations/[-+&.\w\s%]+/">([^<]+)<""") cats = rx.findall(html) cats = list(set(cats)) cats = [s.capitalize() for s in cats] self.categories = sorted(list(set(cats))) # fetch station lists def update_streams(self, cat, search=None): entries = [] if not search and cat not in self.categories: return [] rx_pages = re.compile('href="/stations/[-+\w%\d\s]+/page(\d+)">\d+</a>') # Fetch multiple pages at once html = [] max_pages = max(int(conf.internetradio_max_pages), 1) for page in range(1, max_pages): # Append HTML source if search: html.append( ahttp.get("%ssearch/?radio=%s%s" % (self.base_url, search, "&page=%s" % page if page>1 else "")) ) else: html.append( ahttp.get("%sstations/%s/%s" % (self.base_url, cat.lower().replace(" ", "%20"), "page%s" % page if page>1 else "")) ) # Is there a next page? if str(page+1) not in rx_pages.findall(html[-1]): break self.parent.status(float(page)/float(max_pages+1), timeout=1) # Alternatively try regex or pyquery parsing |
︙ | ︙ | |||
148 149 150 151 152 153 154 155 156 | log.PROC("internet-radio, dom") rx_numbers = re.compile("(\d+)") r = [] for html in html_list: # the streams are arranged in table rows doc = pq(html) for dir in (pq(e) for e in doc("tr")): # bitrate/listeners | > | > | > > | | | | < > > > | 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | log.PROC("internet-radio, dom") rx_numbers = re.compile("(\d+)") r = [] for html in html_list: # the streams are arranged in table rows doc = pq(html) for dir in (pq(e) for e in doc("tr")): #log.HTML(dir) # bitrate/listeners bl = dir.find("p") if bl: bl = rx_numbers.findall(str(bl.text()) + " 0 0") else: bl = [0, 0] # stream url url = dir.find("i").eq(0).parent().attr("onclick") if url: url = re.search("(http://[^\'\"\>]+)", url) if url: url = url.group(0) else: url = "" else: url = "" row = { "title": dir.find("h4").text(), "homepage": ahttp.fix_url(dir.find("a.small").attr("href") or ""), "url": url, "genre": dir.find("a[href^='/stations/']").text() or "", "listeners": int(bl[0]), "bitrate": int(bl[1]), "format": "audio/mpeg", "playing": dir.find("b").text(), } #log.DATA(row) r.append(row) return r |