Check-in [c05cee4440]
Overview
Comment: | Simplify best_url() dict generation, typecasting, add more comments on channel webpage. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
c05cee4440969082d26c08c178fb3221 |
User & Date: | mario on 2017-02-13 16:48:52 |
Other Links: | manifest | tags |
Context
2017-02-13
| ||
16:50 | Document channel website behaviour / internal playlist generation urls. Reuse global conf.max_streams (as orientation for no of pages to fetch). check-in: 72806f95a5 user: mario tags: trunk | |
16:48 | Simplify best_url() dict generation, typecasting, add more comments on channel webpage. check-in: c05cee4440 user: mario tags: trunk | |
16:47 | Add version and -V flags. check-in: 39b9182c6e user: mario tags: trunk | |
Changes
Modified contrib/radiolist.py from [472e976cd7] to [4692e7b068].
1 2 3 4 5 | # encoding: UTF-8 # api: streamtuner2 # title: radiolist.net # description: Station list by continent+country # url: http://radiolist.net/ | | > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | # encoding: UTF-8 # api: streamtuner2 # title: radiolist.net # description: Station list by continent+country # url: http://radiolist.net/ # version: 0.2 # type: channel # category: radio # priority: extra # png: # iVBORw0KGgoAAAANSUhEUgAAABgAAAAYBAMAAAASWSDLAAAAFVBMVEVKb61qibyDnMegs9S6yeDV4O37/vyx66abAAAAAWJLR0QAiAUdSAAAAAlwSFlzAAALEwAACxMB # AJqcGAAAAAd0SU1FB+ECDBAgLJqgZW4AAADoSURBVBjTNdBNj4MgEAbgqdLeZdo9C5NwFmo5Y7Wedauc1y/+/09YdLskkDwJmZl3IOxnON4A8frQhdc/7mG2cv3gx29X # rdUfZuVHQ3JHEzZ7GSuNXxFV/FYYwryO6MOiZqEdnQPUC/fsXZaMuxa6MFfOVYN7kIWpHZClyJGLFjbbC617KaRUEJ4r4fU7IqNYrW5f2kgU5gZInG6MZ086eejcyIvO # 1KwoLayoJjqnuWO5giW8msxVmBQXD5PttSlRm8TG2fDNZS3rRO/opeSCMnPa82xSmNgkfRxJ5yZxlPrPDmLu+7GqX4lERq4G0UEyAAAAAElFTkSuQmCC # extraction-method: regex # # Radio station list grouped by continents and countries. # Some categories return no results, because web players are # filtered out. import re import action import ahttp from config import * from channels import * # radiolist.net # # Β· Groups stations by continents and countries. Where Europe seems to be the # main category (empty "" path), while U.S. is labeled "/world", and Canada # and Asia etc. again a subpath "/world/canada" even. The .catmap{} assigns # paths to titles. # # Β· Playlist formats vary wildly. Therefore this module comes with a guessing # method (super crude) of its own. # # Β· The audio-format-from-URL guessing should be generalized out of here perhaps. # # Β· Each station is in a <tr>β¦</tr> block. Invidual regexps are used for field # extraction afterwards (instead of a block match). # # Β· Entries may contain more than one streaming url. Each accompanied by a # bitrate. β Therefore the .best_url() sorting method. # # Β· Later versions might of course use multi-urls againβ¦ # class radiolist (ChannelPlugin): # module attributes listformat = "pls" has_search = False categories = ["Europe", "America", "Canada", "Oceania", "Asia"] catmap = {"Europe":"", "America":"world", "Canada":"world/canada", "Oceania":"world/oceania", "Asia":"world/asia"} |
︙ | ︙ | |||
58 59 60 61 62 63 64 | def update_streams(self, cat): rx_title = re.compile('<a href="([^">]+)" target="_blank">(.+?)</a>', re.I) rx_urls = re.compile('<a href="([^">]+)">(\d+)(?: Kbps)?</a>', re.I) rx_genre = re.compile('<td class="cell">([^<]+)</td>', re.I) entries = [] html = ahttp.get("http://www.radiolist.net/" + self.catmap[cat]) for block in re.findall("<tr>(.+?)</tr>", html, re.S): | | | | | > | < < | < < | | | 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | def update_streams(self, cat): rx_title = re.compile('<a href="([^">]+)" target="_blank">(.+?)</a>', re.I) rx_urls = re.compile('<a href="([^">]+)">(\d+)(?: Kbps)?</a>', re.I) rx_genre = re.compile('<td class="cell">([^<]+)</td>', re.I) entries = [] html = ahttp.get("http://www.radiolist.net/" + self.catmap[cat]) for block in re.findall("<tr>(.+?)</tr>", html, re.S): ut = re.findall(rx_title, block) # homepage+title uu = re.findall(rx_urls, block) # urls+bitrates lg = re.findall(rx_genre, block) # location+genre #print ut, uu, lg if ut and uu and lg: url, br = self.best_url(uu) entries.append(dict( homepage = ut[0][0], title = unhtml(ut[0][1]), url = url, bitrate = br, format = self.guess_fmt(url), listformat = self.guess_pls(url), playing = lg[0], genre = lg[1] )) # done [log.DATA(e) for e in entries] return entries # pick highest rated URL from [(url,bitrate),β¦] tuples def best_url(self, urls): r = dict([(u, to_int(b)) for u,b in urls]) # {url: bitrate, β¦} best = sorted(r, key=r.get, reverse=True) return best[0], r[best[0]] # see if audio type can be guessed def guess_fmt(self, url): ext = re.findall("mp3|ogg|wma|aac|mp4", url) if ext: return mime_fmt(ext[0]) else: return "audio/mpeg" # guess PLS/M3U from url def guess_pls(self, url): ext = re.findall("|".join(action.playlist_fmt_prio), url) if ext: return ext[0] else: return "srv" |