Check-in [4eccc8b70a]
Overview
| Comment: | Implement search function for I-R, fix DOM extraction method. |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
4eccc8b70a9e6903f803f05cd19aeec2 |
| User & Date: | mario on 2016-12-26 21:54:10 |
| Other Links: | manifest | tags |
Context
|
2016-12-27
| ||
| 21:20 | Remove stale comment, fix whitespace issue check-in: b5753830fa user: mario tags: trunk | |
|
2016-12-26
| ||
| 21:54 | Implement search function for I-R, fix DOM extraction method. check-in: 4eccc8b70a user: mario tags: trunk | |
| 20:43 | Use file:/// urls again. Escape backslashes for Windows now. check-in: 9bef9860b6 user: Oliver tags: trunk | |
Changes
Modified channels/internet_radio.py from [e2da369cc7] to [78008b144e].
1 2 3 4 5 | # api: streamtuner2 # title: Internet-Radio # description: Broad list of webradios from all genres. # type: channel # category: radio | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
# api: streamtuner2
# title: Internet-Radio
# description: Broad list of webradios from all genres.
# type: channel
# category: radio
# version: 1.5
# url: http://www.internet-radio.com/
# config:
# { name: internetradio_max_pages, type: int, value: 5, category: limit, description: How many pages to fetch and read. }
# priority: standard
# png:
# iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABHNCSVQICAgIfAhkiAAAAaZJREFUOI2N0j1PlEEUBeBnlsVoRJcCPwINxIJGAoWVFbVK4g8w
# oUUTFRNbG3+FhVErK36BhcYCrTR8FS4mRGUXFEKCUizRwLXgnfV1Awk3mUzmnHPvPXNnUkSE40RKFYxhHKMYxFb1GIlnMLVN/etBUjuGWDm6wEHHyQbLW/Qd
# JTu8QEq9mJlnogz3sHOJV3iHz2iKzuDiH+bm+J3XD74EU5Gc7pSn/4aYUi14s8BIhkZ5puKhvdgpNFVccaA5oaP7TO66SCuYKnG9weMmjaz5yadqqfvkPE/z
|
| ︙ | ︙ | |||
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# streams and gui
class internet_radio (ChannelPlugin):
# control data
listformat = "pls"
categories = []
base_url = "https://www.internet-radio.com/"
# load genres
def update_categories(self):
html = ahttp.get(self.base_url)
rx = re.compile("""="/stations/[-+&.\w\s%]+/">([^<]+)<""")
cats = rx.findall(html)
cats = list(set(cats))
cats = [s.capitalize() for s in cats]
self.categories = sorted(list(set(cats)))
# fetch station lists
| > | | > | | < < < > > > | | 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# streams and gui
class internet_radio (ChannelPlugin):
# control data
listformat = "pls"
categories = []
base_url = "https://www.internet-radio.com/"
has_search = True
# load genres
def update_categories(self):
html = ahttp.get(self.base_url)
rx = re.compile("""="/stations/[-+&.\w\s%]+/">([^<]+)<""")
cats = rx.findall(html)
cats = list(set(cats))
cats = [s.capitalize() for s in cats]
self.categories = sorted(list(set(cats)))
# fetch station lists
def update_streams(self, cat, search=None):
entries = []
if not search and cat not in self.categories:
return []
rx_pages = re.compile('href="/stations/[-+\w%\d\s]+/page(\d+)">\d+</a>')
# Fetch multiple pages at once
html = []
max_pages = max(int(conf.internetradio_max_pages), 1)
for page in range(1, max_pages):
# Append HTML source
if search:
html.append(
ahttp.get("%ssearch/?radio=%s%s" % (self.base_url, search, "&page=%s" % page if page>1 else ""))
)
else:
html.append(
ahttp.get("%sstations/%s/%s" % (self.base_url, cat.lower().replace(" ", "%20"), "page%s" % page if page>1 else ""))
)
# Is there a next page?
if str(page+1) not in rx_pages.findall(html[-1]):
break
self.parent.status(float(page)/float(max_pages+1), timeout=1)
# Alternatively try regex or pyquery parsing
|
| ︙ | ︙ | |||
148 149 150 151 152 153 154 155 156 |
log.PROC("internet-radio, dom")
rx_numbers = re.compile("(\d+)")
r = []
for html in html_list:
# the streams are arranged in table rows
doc = pq(html)
for dir in (pq(e) for e in doc("tr")):
# bitrate/listeners
| > | > | > > | | | | < > > > | 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
log.PROC("internet-radio, dom")
rx_numbers = re.compile("(\d+)")
r = []
for html in html_list:
# the streams are arranged in table rows
doc = pq(html)
for dir in (pq(e) for e in doc("tr")):
#log.HTML(dir)
# bitrate/listeners
bl = dir.find("p")
if bl:
bl = rx_numbers.findall(str(bl.text()) + " 0 0")
else:
bl = [0, 0]
# stream url
url = dir.find("i").eq(0).parent().attr("onclick")
if url:
url = re.search("(http://[^\'\"\>]+)", url)
if url:
url = url.group(0)
else:
url = ""
else:
url = ""
row = {
"title": dir.find("h4").text(),
"homepage": ahttp.fix_url(dir.find("a.small").attr("href") or ""),
"url": url,
"genre": dir.find("a[href^='/stations/']").text() or "",
"listeners": int(bl[0]),
"bitrate": int(bl[1]),
"format": "audio/mpeg",
"playing": dir.find("b").text(),
}
#log.DATA(row)
r.append(row)
return r
|