Index: contrib/streamlicensing.py ================================================================== --- contrib/streamlicensing.py +++ contrib/streamlicensing.py @@ -1,22 +1,22 @@ # encoding: UTF-8 # api: streamtuner2 # title: streamlicensing # description: Smaller streaming service provider # url: http://www.streamlicensing.com/directory/ -# version: 0.1 +# version: 0.2 # type: channel # category: radio # priority: extra # png: # iVBORw0KGgoAAAANSUhEUgAAABAAAAAQBAMAAADt3eJSAAAAGFBMVEVhcAAODy8rLVpHS4RPU22DismUmLTm6va3Zc/ZAAAAAXRSTlMAQObYZgAA # AAFiS0dEAIgFHUgAAAAJcEhZcwAACxMAAAsTAQCanBgAAAAHdElNRQfhAgwUMiN14qDwAAAAW0lEQVQI12NggAADCMVW7gym3crBDJb08rJgZbB4 # qqEQmBGsKMQEZggqMLCA1LAJM8AZaWUQhjpEO5uwamiQMJjBrChkADSlXBhsfHh5qgCYoWysqACxWQlCAwArBw5QNfhFygAAAABJRU5ErkJggg== # extraction-method: regex # -# Streaming service provider, which ensures station legality or fees -# according to US copyright/streaming laws. +# Streaming service provider, which ensures station legality and fees +# in accordance with US copyright conversion corporations. # # Has only major categories. Does not provide channel homepages, and # is a bit slow due to huge page sizes. No search function implemented # here. # @@ -26,11 +26,28 @@ import ahttp from config import * from channels import * -# radiolist.net +# streamlicensing.com +# +# · Provides around 20 categories, associated to numeric ids (?g= parameter) +# +# · Station lists are one block each, with a JS inline script associating +# some web player parameters. +# +# · Each station has a station id=, but also a stream_id= for the playlist +# generator. +# +# · Normally just provides a web player, but used to show .pls links for various +# players. Meanwhile this is hidden, but the playlist generator is still +# available - so doesn't require double lookups. +# → http://www.streamlicensing.com/directory/index.cgi?action=webpro_links&sid=4785&start=1&g=14&e=1&s= +# → .../directory/index.cgi/playlist.pls?action=playlist&type=pls&sid=4785&stream_id=1234 +# +# · family_safe and maxpages are hardcoded config options for now. +# class streamlicensing (ChannelPlugin): # module attributes listformat = "pls" has_search = False @@ -38,25 +55,24 @@ catmap = {} titles = dict( listeners=False ) # config (not as plugin options here) conf_family_unsafe = 0 - conf_maxpages = 3 + conf_maxpages = max(int(int(conf.max_streams) / 100), 1) # magic values base_url = "http://www.streamlicensing.com/directory/" pls_sffx = "%sindex.cgi/playlist.pls?action=playlist&type=pls&sid=%s&stream_id=%s" - # This is well hidden, but it comes with a playlist generator, so doesn't require double lookups. - # http://www.streamlicensing.com/directory/index.cgi?action=webpro_links&sid=4785&start=1&g=14&e=1&s= - # .../directory/index.cgi/playlist.pls?action=playlist&type=pls&sid=4785&stream_id=1234 + - # just a static list for now + # fetch category titles and catmap def update_categories(self): html = ahttp.get(self.base_url) cats = re.findall('"\?start=&g=(\d+)&e=&s="><.+?>([\w\s-]+)', html) self.categories = sorted([c[1] for c in cats]) self.catmap = dict([(t,i) for i,t in cats]) + # extract stream urls def update_streams(self, cat): # prep block regex @@ -68,15 +84,15 @@ """, re.I|re.S|re.X|re.M) # collect pages into single string html = "" for page in range(0, self.conf_maxpages): + self.progress(self.conf_maxpages, page) html += ahttp.get("%s?start=%s&g=%s&e=%s&s=" % (self.base_url, page * 10, self.catmap[cat], self.conf_family_unsafe)) if not re.search("\?start=%s.*>Next" % ((page + 1) * 10), html): break html = re.sub(">Featured Stations.+?>Previous Page", "", html, 100, re.S) - print html # extract and convert to station rows entries = [] for uu in re.findall(rx_genre, html): print uu