Index: channels/shoutcast.py
==================================================================
--- channels/shoutcast.py
+++ channels/shoutcast.py
@@ -9,21 +9,32 @@
 # depends: pq, re, http
 # author: Mario
 # original: Jean-Yves Lefort
 #
 # Shoutcast is a server software for audio streaming. It automatically spools
-# station information on shoutcast.com, which this plugin can read out.
+# station information on shoutcast.com
+# It has been aquired by Radionomy in 2014, since then significant changes
+# took place. The former YP got deprecated, now seemingly undeprecated.
+#
+#   http://wiki.winamp.com/wiki/SHOUTcast_Radio_Directory_API 
+#
+# But neither their Wiki nor Bulletin Board provide concrete information on
+# the eligibility of open source desktop apps for an authhash.
+#
+# Therefore we'll be retrieving stuff from the homepage still. The new
+# interface conveniently uses JSON already, so let's use that:
+#
+#   POST http://www.shoutcast.com/Home/BrowseByGenre {genreid: 9}
 #
-# After its recent aquisition the layout got slimmed down considerably. So
-# there's not a lot of information to fetch left. And this plugin is now back
-# to defaulting to regex extraction instead of HTML parsing & DOM extraction.
+# We do need a catmap now too, but that's easy to aquire and will be kept
+# within the cache dirs.
 #
 #
 #
 
-
 import ahttp as http
+from json import loads as json_decode
 import re
 from config import conf, __print__, dbg
 from pq import pq
 #from channels import *    # works everywhere but in this plugin(???!)
 import channels
@@ -32,165 +43,102 @@
 
 
 # SHOUTcast data module                                          ----------------------------------------
 class shoutcast(channels.ChannelPlugin):
 
-        # desc
-        api = "streamtuner2"
-        module = "shoutcast"
-        title = "SHOUTcast"
-        homepage = "http://www.shoutcast.com/" 
-        base_url = "http://shoutcast.com/"
-        listformat = "audio/x-scpls"
-
-        # settings
-        config = [
-        ]
-        
-        # categories
-        categories = ['Alternative', ['Adult Alternative', 'Britpop', 'Classic Alternative', 'College', 'Dancepunk', 'Dream Pop', 'Emo', 'Goth', 'Grunge', 'Hardcore', 'Indie Pop', 'Indie Rock', 'Industrial', 'Modern Rock', 'New Wave', 'Noise Pop', 'Power Pop', 'Punk', 'Ska', 'Xtreme'], 'Blues', ['Acoustic Blues', 'Chicago Blues', 'Contemporary Blues', 'Country Blues', 'Delta Blues', 'Electric Blues'], 'Classical', ['Baroque', 'Chamber', 'Choral', 'Classical Period', 'Early Classical', 'Impressionist', 'Modern', 'Opera', 'Piano', 'Romantic', 'Symphony'], 'Country', ['Americana', 'Bluegrass', 'Classic Country', 'Contemporary Bluegrass', 'Contemporary Country', 'Honky Tonk', 'Hot Country Hits', 'Western'], 'Decades', ['30s', '40s', '50s', '60s', '70s', '80s', '90s'], 'Easy Listening', ['Exotica', 'Light Rock', 'Lounge', 'Orchestral Pop', 'Polka', 'Space Age Pop'], 'Electronic', ['Acid House', 'Ambient', 'Big Beat', 'Breakbeat', 'Dance', 'Demo', 'Disco', 'Downtempo', 'Drum and Bass', 'Electro', 'Garage', 'Hard House', 'House', 'IDM', 'Jungle', 'Progressive', 'Techno', 'Trance', 'Tribal', 'Trip Hop'], 'Folk', ['Alternative Folk', 'Contemporary Folk', 'Folk Rock', 'New Acoustic', 'Traditional Folk', 'World Folk'], 'Inspirational', ['Christian', 'Christian Metal', 'Christian Rap', 'Christian Rock', 'Classic Christian', 'Contemporary Gospel', 'Gospel', 'Southern Gospel', 'Traditional Gospel'], 'International', ['African', 'Arabic', 'Asian', 'Bollywood', 'Brazilian', 'Caribbean', 'Celtic', 'Chinese', 'European', 'Filipino', 'French', 'Greek', 'Hindi', 'Indian', 'Japanese', 'Jewish', 'Klezmer', 'Korean', 'Mediterranean', 'Middle Eastern', 'North American', 'Russian', 'Soca', 'South American', 'Tamil', 'Worldbeat', 'Zouk'], 'Jazz', ['Acid Jazz', 'Avant Garde', 'Big Band', 'Bop', 'Classic Jazz', 'Cool Jazz', 'Fusion', 'Hard Bop', 'Latin Jazz', 'Smooth Jazz', 'Swing', 'Vocal Jazz', 'World Fusion'], 'Latin', ['Bachata', 'Banda', 'Bossa Nova', 'Cumbia', 'Latin Dance', 'Latin Pop', 'Latin Rock', 'Mariachi', 'Merengue', 'Ranchera', 'Reggaeton', 'Regional Mexican', 'Salsa', 'Tango', 'Tejano', 'Tropicalia'], 'Metal', ['Black Metal', 'Classic Metal', 'Extreme Metal', 'Grindcore', 'Hair Metal', 'Heavy Metal', 'Metalcore', 'Power Metal', 'Progressive Metal', 'Rap Metal'], 'Misc', [], 'New Age', ['Environmental', 'Ethnic Fusion', 'Healing', 'Meditation', 'Spiritual'], 'Pop', ['Adult Contemporary', 'Barbershop', 'Bubblegum Pop', 'Dance Pop', 'Idols', 'JPOP', 'Oldies', 'Soft Rock', 'Teen Pop', 'Top 40', 'World Pop'], 'Public Radio', ['College', 'News', 'Sports', 'Talk'], 'Rap', ['Alternative Rap', 'Dirty South', 'East Coast Rap', 'Freestyle', 'Gangsta Rap', 'Hip Hop', 'Mixtapes', 'Old School', 'Turntablism', 'West Coast Rap'], 'Reggae', ['Contemporary Reggae', 'Dancehall', 'Dub', 'Ragga', 'Reggae Roots', 'Rock Steady'], 'Rock', ['Adult Album Alternative', 'British Invasion', 'Classic Rock', 'Garage Rock', 'Glam', 'Hard Rock', 'Jam Bands', 'Piano Rock', 'Prog Rock', 'Psychedelic', 'Rockabilly', 'Surf'], 'Soundtracks', ['Anime', 'Kids', 'Original Score', 'Showtunes', 'Video Game Music'], 'Talk', ['BlogTalk', 'Comedy', 'Community', 'Educational', 'Government', 'News', 'Old Time Radio', 'Other Talk', 'Political', 'Scanner', 'Spoken Word', 'Sports', 'Technology'], 'Themes', ['Adult', 'Best Of', 'Chill', 'Eclectic', 'Experimental', 'Female', 'Heartache', 'Instrumental', 'LGBT', 'Party Mix', 'Patriotic', 'Rainy Day Mix', 'Reality', 'Sexy', 'Shuffle', 'Travel Mix', 'Tribute', 'Trippy', 'Work Mix']]
-        #["default", [], 'TopTen', [], 'Alternative', ['College', 'Emo', 'Hardcore', 'Industrial', 'Punk', 'Ska'], 'Americana', ['Bluegrass', 'Blues', 'Cajun', 'Folk'], 'Classical', ['Contemporary', 'Opera', 'Symphonic'], 'Country', ['Bluegrass', 'New Country', 'Western Swing'], 'Electronic', ['Acid Jazz', 'Ambient', 'Breakbeat', 'Downtempo', 'Drum and Bass', 'House', 'Trance', 'Techno'], 'Hip Hop', ['Alternative', 'Hardcore', 'New School', 'Old School', 'Turntablism'], 'Jazz', ['Acid Jazz', 'Big Band', 'Classic', 'Latin', 'Smooth', 'Swing'], 'Pop/Rock', ['70s', '80s', 'Classic', 'Metal', 'Oldies', 'Pop', 'Rock', 'Top 40'], 'R&B/Soul', ['Classic', 'Contemporary', 'Funk', 'Smooth', 'Urban'], 'Spiritual', ['Alternative', 'Country', 'Gospel', 'Pop', 'Rock'], 'Spoken', ['Comedy', 'Spoken Word', 'Talk'], 'World', ['African', 'Asian', 'European', 'Latin', 'Middle Eastern', 'Reggae'], 'Other/Mixed', ['Eclectic', 'Film', 'Instrumental']]
-        current = ""
-        default = "Alternative"
-        empty = ""
-
-        
-        # redefine
-        streams = {}
-        
-            
-        # extracts the category list from shoutcast.com,
-        # sub-categories are queried per 'AJAX'
-        def update_categories(self):
-            html = http.get(self.base_url)
-            self.categories = []
-            #__print__( dbg.DATA, html )
-
-            # <h2>Radio Genres</h2>
-            rx = re.compile(r'<li((?:\s+id="\d+"\s+class="files")?)><a href="\?action=sub&cat=([\w\s]+)#(\d+)">[\w\s]+</a>', re.S)
-            sub = []
-            for uu in rx.findall(html):
-                #__print__( dbg.DATA, uu )
-                (main,name,id) = uu
-                name = urllib.unquote(name)
-
-                # main category
-                if main:
-                    if sub:
-                        self.categories.append(sub)
-                        sub = []
-                    self.categories.append(name)
-                else:
-                    sub.append(name)
-
-            # it's done
-            #__print__( dbg.PROC, self.categories )
-            conf.save("cache/categories_shoutcast", self.categories)
-            pass
-
-
-        # downloads stream list from shoutcast for given category
-        def update_streams(self, cat):
-
-            if (not cat or cat == self.empty):
-                __print__( dbg.ERR, "nocat" )
-                return []
-
-            #/radiolist.cfm?action=sub&string=&cat=Oldies&_cf_containerId=radiolist&_cf_nodebug=true&_cf_nocache=true&_cf_rc=0
-            #/radiolist.cfm?start=19&action=sub&string=&cat=Oldies&amount=18&order=listeners
-            # page
-            url = "http://www.shoutcast.com/radiolist.cfm"
-            params = {
-                "action": "sub",
-                "string": "",
-                "cat": cat,
-                "order": "listeners",
-                "amount": conf.max_streams,
-            }
-            referer = "http://www.shoutcast.com/?action=sub&cat="+cat
-            html = http.get(url, params=params, referer=referer, ajax=1)
-            self.parent.status(0.75)
-
-            #__print__(dbg.DATA, html)
-            #__print__(re.compile("id=(\d+)").findall(html));
-            # new html
-            """ 
-            <tr>
-               <td width="6%"><a href="#" onClick="window.open('player/?radname=Schlagerhoelle%20%2D%20das%20Paradies%20fr%20Schlager%20%20und%20Discofox&stationid=14687&coding=MP3','radplayer','height=232,width=776')"><img class="icon transition" src="/img/icon-play.png" alt="Play"></a></td>
-               <td width="30%"><a class="transition" href="http://yp.shoutcast.com/sbin/tunein-station.pls?id=14687">Schlagerhoelle - das Paradies fr Schlager  und Discofox</a></td>
-               <td width="12%" style="text-align:left;" width="10%">Oldies</td>
-               <td width="12%" style="text-align:left;" width="10%">955</td>
-               <td width="12%" style="text-align:left;" width="10%">128</td>
-               <td width="12%" style="text-align:left;" width="10%">MP3</td>
-            </tr>
-            """
-
-            # With the new shallow <td> lists it doesn't make much sense to use
-            # the pyquery DOM traversal. There aren't any sensible selectors to
-            # extract values; it's just counting the tags.
-            #
-            # And there's a bug in PyQuery 1.2.4 and CssSelector. So make two
-            # attempts, alternate between regex and DOM; user preference first.
-            #
-            for use_rx in [not conf.pyquery or not pq, conf.pyquery]:
-                try:
-                    entries = (self.with_regex(html) if use_rx else self.with_dom(html))
-                    if len(entries):
-                        break
-                except Exception as e:
-                    __print__(dbg.ERR, e)
-                    continue
-            return entries
-
-
-        # Extract using regex
-        def with_regex(self, html):
-            __print__(dbg.PROC, "channels.shoutcast.update_streams: regex scraping mode")
-            rx_stream = re.compile(
-                """
-                 <a [^>]+  href="http://yp.shoutcast.com/sbin/tunein-station.pls\?
-                           id=(\d+)">   ([^<>]+)   </a>  </td>
-                 \s+  <td [^>]+  >([^<>]+)</td>
-                 \s+  <td [^>]+  >(\d+)</td>
-                 \s+  <td [^>]+  >(\d+)</td>
-                 \s+  <td [^>]+  >(\w+)</td>
-                """,
-                re.S|re.I|re.X
-            )
-            # extract entries
-            entries = []
-            for m in rx_stream.findall(html):
-                #__print__(m)
-                (id, title, genre, listeners, bitrate, fmt) = m
-                entries += [{
-                    "id": id,
-                    "url": "http://yp.shoutcast.com/sbin/tunein-station.pls?id=" + id,
-                    "title": self.entity_decode(title),
-                    #"homepage": http.fix_url(homepage),
-                    #"playing": self.entity_decode(playing),
-                    "genre": genre,
-                    "listeners": int(listeners),
-                    "max": 0, #int(uu[6]),
-                    "bitrate": int(bitrate),
-                    "format": self.mime_fmt(fmt),
-                }]
-            return entries
-
-
-        # Iterate over DOM instead
-        def with_dom(self, html):
-            __print__(dbg.PROC, "channels.shoutcast.update_streams: attempt PyQuery/DOM traversal")
-            entries = []
-            for div in (pq(e) for e in pq(html).find("tr")):
-                entries.append({
-                     "title": div.find("a.transition").text(),
-                     "url": div.find("a.transition").attr("href"),
-                     "homepage": "",
-                     "listeners": int(div.find("td:eq(3)").text()),
-                     "bitrate": int(div.find("td:eq(4)").text()),
-                     "format": self.mime_fmt(div.find("td:eq(5)").text()),
-                     "max": 0,
-                     "genre": cat,
-                })
-            return entries
-
+    # desc
+    api = "streamtuner2"
+    module = "shoutcast"
+    title = "SHOUTcast"
+    homepage = "http://www.shoutcast.com/" 
+    base_url = "http://shoutcast.com/"
+    listformat = "audio/x-scpls"
+
+    # settings
+    config = [
+    ]
+    
+    # categories
+    categories = []
+    catmap = {"Choral": 35, "Winter": 275, "JROCK": 306, "Motown": 237, "Political": 290, "Tango": 192, "Ska": 22, "Comedy": 283, "Decades": 212, "European": 143, "Reggaeton": 189, "Islamic": 307, "Freestyle": 114, "French": 145, "Western": 53, "Dancepunk": 6, "News": 287, "Xtreme": 23, "Bollywood": 138, "Celtic": 141, "Kids": 278, "Filipino": 144, "Hanukkah": 270, "Greek": 146, "Punk": 21, "Spiritual": 211, "Industrial": 14, "Baroque": 33, "Talk": 282, "JPOP": 227, "Scanner": 291, "Mediterranean": 154, "Swing": 174, "Themes": 89, "IDM": 75, "40s": 214, "Funk": 236, "Rap": 110, "House": 74, "Educational": 285, "Caribbean": 140, "Misc": 295, "30s": 213, "Anniversary": 266, "Sports": 293, "International": 134, "Tribute": 107, "Piano": 41, "Romantic": 42, "90s": 219, "Latin": 177, "Grunge": 10, "Dubstep": 312, "Government": 286, "Country": 44, "Salsa": 191, "Hardcore": 11, "Afrikaans": 309, "Downtempo": 69, "Merengue": 187, "Psychedelic": 260, "Female": 95, "Bop": 167, "Tribal": 80, "Metal": 195, "70s": 217, "Tejano": 193, "Exotica": 55, "Anime": 277, "BlogTalk": 296, "African": 135, "Patriotic": 101, "Blues": 24, "Turntablism": 119, "Chinese": 142, "Garage": 72, "Dance": 66, "Valentine": 273, "Barbershop": 222, "Alternative": 1, "Technology": 294, "Folk": 82, "Klezmer": 152, "Samba": 315, "Turkish": 305, "Trance": 79, "Dub": 245, "Rock": 250, "Polka": 59, "Modern": 39, "Lounge": 57, "Indian": 149, "Hindi": 148, "Brazilian": 139, "Eclectic": 93, "Korean": 153, "Creole": 316, "Dancehall": 244, "Surf": 264, "Reggae": 242, "Goth": 9, "Oldies": 226, "Zouk": 162, "Environmental": 207, "Techno": 78, "Adult": 90, "Rockabilly": 262, "Wedding": 274, "Russian": 157, "Sexy": 104, "Chill": 92, "Opera": 40, "Emo": 8, "Experimental": 94, "Showtunes": 280, "Breakbeat": 65, "Jungle": 76, "Soundtracks": 276, "LoFi": 15, "Metalcore": 202, "Bachata": 178, "Kwanzaa": 272, "Banda": 179, "Americana": 46, "Classical": 32, "German": 302, "Tamil": 160, "Bluegrass": 47, "Halloween": 269, "College": 300, "Ambient": 63, "Birthday": 267, "Meditation": 210, "Electronic": 61, "50s": 215, "Chamber": 34, "Heartache": 96, "Britpop": 3, "Soca": 158, "Grindcore": 199, "Reality": 103, "00s": 303, "Symphony": 43, "Pop": 220, "Ranchera": 188, "Electro": 71, "Christmas": 268, "Christian": 123, "Progressive": 77, "Jazz": 163, "Trippy": 108, "Instrumental": 97, "Tropicalia": 194, "Fusion": 170, "Healing": 209, "Glam": 255, "80s": 218, "KPOP": 308, "Worldbeat": 161, "Mixtapes": 117, "60s": 216, "Mariachi": 186, "Soul": 240, "Cumbia": 181, "Inspirational": 122, "Impressionist": 38, "Gospel": 129, "Disco": 68, "Arabic": 136, "Idols": 225, "Ragga": 247, "Demo": 67, "LGBT": 98, "Honeymoon": 271, "Japanese": 150, "Community": 284, "Weather": 317, "Asian": 137, "Hebrew": 151, "Flamenco": 314, "Shuffle": 105}
+    current = ""
+    default = "Alternative"
+    empty = ""
+    
+    # redefine
+    streams = {}
+    
+        
+    # Extracts the category list from www.shoutcast.com,
+    # stores a catmap (title => id)
+    def update_categories(self):
+        html = http.get(self.base_url)
+        #__print__( dbg.DATA, html )
+        self.categories = []
+
+        # Main genres from mobile dropdown
+        """ <option value="1">Alternative</option> """
+        rx = re.compile(r'<option value="(\d+)">(\w+\h*\w+)<')
+        main = rx.findall(html)
+        # Genre list in sidebar
+        """ <li><a href="#c-genre-2" onclick="loadStationsByGenre(2, true)">Adult Alternative</a></li> """
+        rx = re.compile(r'c-genre-(\d+).+?>(\w[\w\h]*\w)<')
+        subs = rx.findall(html)
+        print main
+        print subs
+
+        # group
+        for (id, title) in subs:
+            if (id,title) in main:
+                self.categories.append(title)
+                current = []
+                self.categories.append(current)
+            else:
+                current.append(title)
+            self.catmap[title] = int(id)
+        self.save()
+
+
+    # downloads stream list from shoutcast for given category
+    def update_streams(self, cat):
+
+        if (cat not in self.catmap):
+            __print__( dbg.ERR, "nocat" )
+            return []
+        id = self.catmap[cat]
+
+        # page
+        url = "http://www.shoutcast.com/Home/BrowseByGenre"
+        params = { "genreid": int(id) }
+        referer = None
+        json = http.get(url, params=params, referer=referer, post=1, ajax=1)
+        self.parent.status(0.75)
+
+        # remap JSON
+        entries = []
+        for e in json_decode(json):
+            entries.append({
+                "id": int(e.get("ID", 0)),
+                "genre": str(e.get("Genre", "")),
+                "title": str(e.get("Name", "")),
+                "playing": str(e.get("CurrentTrack", "")),
+                "bitrate": int(e.get("Bitrate", 0)),
+                "listeners": int(e.get("Listeners", 0)),
+                "url": "http://yp.shoutcast.com/sbin/tunein-station.pls?id=%s" % e.get("ID", "0"),
+                "homepage": "",
+                "format": "audio/mpeg"
+            })
+
+        #__print__(dbg.DATA, entries)
+        return entries
+
+
+    # saves .streams and .catmap
+    def save(self):
+        channels.ChannelPlugin.save(self)
+        conf.save("cache/catmap_" + self.module, self.catmap)
+
+    # read previous channel/stream data, if there is any
+    def cache(self):
+        channels.ChannelPlugin.cache(self)
+        self.catmap = conf.load("cache/catmap_" + self.module) or {}