Index: channels/shoutcast.py
==================================================================
--- channels/shoutcast.py
+++ channels/shoutcast.py
@@ -9,21 +9,32 @@
# depends: pq, re, http
# author: Mario
# original: Jean-Yves Lefort
#
# Shoutcast is a server software for audio streaming. It automatically spools
-# station information on shoutcast.com, which this plugin can read out.
+# station information on shoutcast.com
+# It has been aquired by Radionomy in 2014, since then significant changes
+# took place. The former YP got deprecated, now seemingly undeprecated.
+#
+# http://wiki.winamp.com/wiki/SHOUTcast_Radio_Directory_API
+#
+# But neither their Wiki nor Bulletin Board provide concrete information on
+# the eligibility of open source desktop apps for an authhash.
+#
+# Therefore we'll be retrieving stuff from the homepage still. The new
+# interface conveniently uses JSON already, so let's use that:
+#
+# POST http://www.shoutcast.com/Home/BrowseByGenre {genreid: 9}
#
-# After its recent aquisition the layout got slimmed down considerably. So
-# there's not a lot of information to fetch left. And this plugin is now back
-# to defaulting to regex extraction instead of HTML parsing & DOM extraction.
+# We do need a catmap now too, but that's easy to aquire and will be kept
+# within the cache dirs.
#
#
#
-
import ahttp as http
+from json import loads as json_decode
import re
from config import conf, __print__, dbg
from pq import pq
#from channels import * # works everywhere but in this plugin(???!)
import channels
@@ -32,165 +43,102 @@
# SHOUTcast data module ----------------------------------------
class shoutcast(channels.ChannelPlugin):
- # desc
- api = "streamtuner2"
- module = "shoutcast"
- title = "SHOUTcast"
- homepage = "http://www.shoutcast.com/"
- base_url = "http://shoutcast.com/"
- listformat = "audio/x-scpls"
-
- # settings
- config = [
- ]
-
- # categories
- categories = ['Alternative', ['Adult Alternative', 'Britpop', 'Classic Alternative', 'College', 'Dancepunk', 'Dream Pop', 'Emo', 'Goth', 'Grunge', 'Hardcore', 'Indie Pop', 'Indie Rock', 'Industrial', 'Modern Rock', 'New Wave', 'Noise Pop', 'Power Pop', 'Punk', 'Ska', 'Xtreme'], 'Blues', ['Acoustic Blues', 'Chicago Blues', 'Contemporary Blues', 'Country Blues', 'Delta Blues', 'Electric Blues'], 'Classical', ['Baroque', 'Chamber', 'Choral', 'Classical Period', 'Early Classical', 'Impressionist', 'Modern', 'Opera', 'Piano', 'Romantic', 'Symphony'], 'Country', ['Americana', 'Bluegrass', 'Classic Country', 'Contemporary Bluegrass', 'Contemporary Country', 'Honky Tonk', 'Hot Country Hits', 'Western'], 'Decades', ['30s', '40s', '50s', '60s', '70s', '80s', '90s'], 'Easy Listening', ['Exotica', 'Light Rock', 'Lounge', 'Orchestral Pop', 'Polka', 'Space Age Pop'], 'Electronic', ['Acid House', 'Ambient', 'Big Beat', 'Breakbeat', 'Dance', 'Demo', 'Disco', 'Downtempo', 'Drum and Bass', 'Electro', 'Garage', 'Hard House', 'House', 'IDM', 'Jungle', 'Progressive', 'Techno', 'Trance', 'Tribal', 'Trip Hop'], 'Folk', ['Alternative Folk', 'Contemporary Folk', 'Folk Rock', 'New Acoustic', 'Traditional Folk', 'World Folk'], 'Inspirational', ['Christian', 'Christian Metal', 'Christian Rap', 'Christian Rock', 'Classic Christian', 'Contemporary Gospel', 'Gospel', 'Southern Gospel', 'Traditional Gospel'], 'International', ['African', 'Arabic', 'Asian', 'Bollywood', 'Brazilian', 'Caribbean', 'Celtic', 'Chinese', 'European', 'Filipino', 'French', 'Greek', 'Hindi', 'Indian', 'Japanese', 'Jewish', 'Klezmer', 'Korean', 'Mediterranean', 'Middle Eastern', 'North American', 'Russian', 'Soca', 'South American', 'Tamil', 'Worldbeat', 'Zouk'], 'Jazz', ['Acid Jazz', 'Avant Garde', 'Big Band', 'Bop', 'Classic Jazz', 'Cool Jazz', 'Fusion', 'Hard Bop', 'Latin Jazz', 'Smooth Jazz', 'Swing', 'Vocal Jazz', 'World Fusion'], 'Latin', ['Bachata', 'Banda', 'Bossa Nova', 'Cumbia', 'Latin Dance', 'Latin Pop', 'Latin Rock', 'Mariachi', 'Merengue', 'Ranchera', 'Reggaeton', 'Regional Mexican', 'Salsa', 'Tango', 'Tejano', 'Tropicalia'], 'Metal', ['Black Metal', 'Classic Metal', 'Extreme Metal', 'Grindcore', 'Hair Metal', 'Heavy Metal', 'Metalcore', 'Power Metal', 'Progressive Metal', 'Rap Metal'], 'Misc', [], 'New Age', ['Environmental', 'Ethnic Fusion', 'Healing', 'Meditation', 'Spiritual'], 'Pop', ['Adult Contemporary', 'Barbershop', 'Bubblegum Pop', 'Dance Pop', 'Idols', 'JPOP', 'Oldies', 'Soft Rock', 'Teen Pop', 'Top 40', 'World Pop'], 'Public Radio', ['College', 'News', 'Sports', 'Talk'], 'Rap', ['Alternative Rap', 'Dirty South', 'East Coast Rap', 'Freestyle', 'Gangsta Rap', 'Hip Hop', 'Mixtapes', 'Old School', 'Turntablism', 'West Coast Rap'], 'Reggae', ['Contemporary Reggae', 'Dancehall', 'Dub', 'Ragga', 'Reggae Roots', 'Rock Steady'], 'Rock', ['Adult Album Alternative', 'British Invasion', 'Classic Rock', 'Garage Rock', 'Glam', 'Hard Rock', 'Jam Bands', 'Piano Rock', 'Prog Rock', 'Psychedelic', 'Rockabilly', 'Surf'], 'Soundtracks', ['Anime', 'Kids', 'Original Score', 'Showtunes', 'Video Game Music'], 'Talk', ['BlogTalk', 'Comedy', 'Community', 'Educational', 'Government', 'News', 'Old Time Radio', 'Other Talk', 'Political', 'Scanner', 'Spoken Word', 'Sports', 'Technology'], 'Themes', ['Adult', 'Best Of', 'Chill', 'Eclectic', 'Experimental', 'Female', 'Heartache', 'Instrumental', 'LGBT', 'Party Mix', 'Patriotic', 'Rainy Day Mix', 'Reality', 'Sexy', 'Shuffle', 'Travel Mix', 'Tribute', 'Trippy', 'Work Mix']]
- #["default", [], 'TopTen', [], 'Alternative', ['College', 'Emo', 'Hardcore', 'Industrial', 'Punk', 'Ska'], 'Americana', ['Bluegrass', 'Blues', 'Cajun', 'Folk'], 'Classical', ['Contemporary', 'Opera', 'Symphonic'], 'Country', ['Bluegrass', 'New Country', 'Western Swing'], 'Electronic', ['Acid Jazz', 'Ambient', 'Breakbeat', 'Downtempo', 'Drum and Bass', 'House', 'Trance', 'Techno'], 'Hip Hop', ['Alternative', 'Hardcore', 'New School', 'Old School', 'Turntablism'], 'Jazz', ['Acid Jazz', 'Big Band', 'Classic', 'Latin', 'Smooth', 'Swing'], 'Pop/Rock', ['70s', '80s', 'Classic', 'Metal', 'Oldies', 'Pop', 'Rock', 'Top 40'], 'R&B/Soul', ['Classic', 'Contemporary', 'Funk', 'Smooth', 'Urban'], 'Spiritual', ['Alternative', 'Country', 'Gospel', 'Pop', 'Rock'], 'Spoken', ['Comedy', 'Spoken Word', 'Talk'], 'World', ['African', 'Asian', 'European', 'Latin', 'Middle Eastern', 'Reggae'], 'Other/Mixed', ['Eclectic', 'Film', 'Instrumental']]
- current = ""
- default = "Alternative"
- empty = ""
-
-
- # redefine
- streams = {}
-
-
- # extracts the category list from shoutcast.com,
- # sub-categories are queried per 'AJAX'
- def update_categories(self):
- html = http.get(self.base_url)
- self.categories = []
- #__print__( dbg.DATA, html )
-
- #
Radio Genres
- rx = re.compile(r'[\w\s]+ ', re.S)
- sub = []
- for uu in rx.findall(html):
- #__print__( dbg.DATA, uu )
- (main,name,id) = uu
- name = urllib.unquote(name)
-
- # main category
- if main:
- if sub:
- self.categories.append(sub)
- sub = []
- self.categories.append(name)
- else:
- sub.append(name)
-
- # it's done
- #__print__( dbg.PROC, self.categories )
- conf.save("cache/categories_shoutcast", self.categories)
- pass
-
-
- # downloads stream list from shoutcast for given category
- def update_streams(self, cat):
-
- if (not cat or cat == self.empty):
- __print__( dbg.ERR, "nocat" )
- return []
-
- #/radiolist.cfm?action=sub&string=&cat=Oldies&_cf_containerId=radiolist&_cf_nodebug=true&_cf_nocache=true&_cf_rc=0
- #/radiolist.cfm?start=19&action=sub&string=&cat=Oldies&amount=18&order=listeners
- # page
- url = "http://www.shoutcast.com/radiolist.cfm"
- params = {
- "action": "sub",
- "string": "",
- "cat": cat,
- "order": "listeners",
- "amount": conf.max_streams,
- }
- referer = "http://www.shoutcast.com/?action=sub&cat="+cat
- html = http.get(url, params=params, referer=referer, ajax=1)
- self.parent.status(0.75)
-
- #__print__(dbg.DATA, html)
- #__print__(re.compile("id=(\d+)").findall(html));
- # new html
- """
-
-
- Schlagerhoelle - das Paradies fr Schlager und Discofox
- Oldies
- 955
- 128
- MP3
-
- """
-
- # With the new shallow lists it doesn't make much sense to use
- # the pyquery DOM traversal. There aren't any sensible selectors to
- # extract values; it's just counting the tags.
- #
- # And there's a bug in PyQuery 1.2.4 and CssSelector. So make two
- # attempts, alternate between regex and DOM; user preference first.
- #
- for use_rx in [not conf.pyquery or not pq, conf.pyquery]:
- try:
- entries = (self.with_regex(html) if use_rx else self.with_dom(html))
- if len(entries):
- break
- except Exception as e:
- __print__(dbg.ERR, e)
- continue
- return entries
-
-
- # Extract using regex
- def with_regex(self, html):
- __print__(dbg.PROC, "channels.shoutcast.update_streams: regex scraping mode")
- rx_stream = re.compile(
- """
- ]+ href="http://yp.shoutcast.com/sbin/tunein-station.pls\?
- id=(\d+)"> ([^<>]+)
- \s+ ]+ >([^<>]+)
- \s+ ]+ >(\d+)
- \s+ ]+ >(\d+)
- \s+ ]+ >(\w+)
- """,
- re.S|re.I|re.X
- )
- # extract entries
- entries = []
- for m in rx_stream.findall(html):
- #__print__(m)
- (id, title, genre, listeners, bitrate, fmt) = m
- entries += [{
- "id": id,
- "url": "http://yp.shoutcast.com/sbin/tunein-station.pls?id=" + id,
- "title": self.entity_decode(title),
- #"homepage": http.fix_url(homepage),
- #"playing": self.entity_decode(playing),
- "genre": genre,
- "listeners": int(listeners),
- "max": 0, #int(uu[6]),
- "bitrate": int(bitrate),
- "format": self.mime_fmt(fmt),
- }]
- return entries
-
-
- # Iterate over DOM instead
- def with_dom(self, html):
- __print__(dbg.PROC, "channels.shoutcast.update_streams: attempt PyQuery/DOM traversal")
- entries = []
- for div in (pq(e) for e in pq(html).find("tr")):
- entries.append({
- "title": div.find("a.transition").text(),
- "url": div.find("a.transition").attr("href"),
- "homepage": "",
- "listeners": int(div.find("td:eq(3)").text()),
- "bitrate": int(div.find("td:eq(4)").text()),
- "format": self.mime_fmt(div.find("td:eq(5)").text()),
- "max": 0,
- "genre": cat,
- })
- return entries
-
+ # desc
+ api = "streamtuner2"
+ module = "shoutcast"
+ title = "SHOUTcast"
+ homepage = "http://www.shoutcast.com/"
+ base_url = "http://shoutcast.com/"
+ listformat = "audio/x-scpls"
+
+ # settings
+ config = [
+ ]
+
+ # categories
+ categories = []
+ catmap = {"Choral": 35, "Winter": 275, "JROCK": 306, "Motown": 237, "Political": 290, "Tango": 192, "Ska": 22, "Comedy": 283, "Decades": 212, "European": 143, "Reggaeton": 189, "Islamic": 307, "Freestyle": 114, "French": 145, "Western": 53, "Dancepunk": 6, "News": 287, "Xtreme": 23, "Bollywood": 138, "Celtic": 141, "Kids": 278, "Filipino": 144, "Hanukkah": 270, "Greek": 146, "Punk": 21, "Spiritual": 211, "Industrial": 14, "Baroque": 33, "Talk": 282, "JPOP": 227, "Scanner": 291, "Mediterranean": 154, "Swing": 174, "Themes": 89, "IDM": 75, "40s": 214, "Funk": 236, "Rap": 110, "House": 74, "Educational": 285, "Caribbean": 140, "Misc": 295, "30s": 213, "Anniversary": 266, "Sports": 293, "International": 134, "Tribute": 107, "Piano": 41, "Romantic": 42, "90s": 219, "Latin": 177, "Grunge": 10, "Dubstep": 312, "Government": 286, "Country": 44, "Salsa": 191, "Hardcore": 11, "Afrikaans": 309, "Downtempo": 69, "Merengue": 187, "Psychedelic": 260, "Female": 95, "Bop": 167, "Tribal": 80, "Metal": 195, "70s": 217, "Tejano": 193, "Exotica": 55, "Anime": 277, "BlogTalk": 296, "African": 135, "Patriotic": 101, "Blues": 24, "Turntablism": 119, "Chinese": 142, "Garage": 72, "Dance": 66, "Valentine": 273, "Barbershop": 222, "Alternative": 1, "Technology": 294, "Folk": 82, "Klezmer": 152, "Samba": 315, "Turkish": 305, "Trance": 79, "Dub": 245, "Rock": 250, "Polka": 59, "Modern": 39, "Lounge": 57, "Indian": 149, "Hindi": 148, "Brazilian": 139, "Eclectic": 93, "Korean": 153, "Creole": 316, "Dancehall": 244, "Surf": 264, "Reggae": 242, "Goth": 9, "Oldies": 226, "Zouk": 162, "Environmental": 207, "Techno": 78, "Adult": 90, "Rockabilly": 262, "Wedding": 274, "Russian": 157, "Sexy": 104, "Chill": 92, "Opera": 40, "Emo": 8, "Experimental": 94, "Showtunes": 280, "Breakbeat": 65, "Jungle": 76, "Soundtracks": 276, "LoFi": 15, "Metalcore": 202, "Bachata": 178, "Kwanzaa": 272, "Banda": 179, "Americana": 46, "Classical": 32, "German": 302, "Tamil": 160, "Bluegrass": 47, "Halloween": 269, "College": 300, "Ambient": 63, "Birthday": 267, "Meditation": 210, "Electronic": 61, "50s": 215, "Chamber": 34, "Heartache": 96, "Britpop": 3, "Soca": 158, "Grindcore": 199, "Reality": 103, "00s": 303, "Symphony": 43, "Pop": 220, "Ranchera": 188, "Electro": 71, "Christmas": 268, "Christian": 123, "Progressive": 77, "Jazz": 163, "Trippy": 108, "Instrumental": 97, "Tropicalia": 194, "Fusion": 170, "Healing": 209, "Glam": 255, "80s": 218, "KPOP": 308, "Worldbeat": 161, "Mixtapes": 117, "60s": 216, "Mariachi": 186, "Soul": 240, "Cumbia": 181, "Inspirational": 122, "Impressionist": 38, "Gospel": 129, "Disco": 68, "Arabic": 136, "Idols": 225, "Ragga": 247, "Demo": 67, "LGBT": 98, "Honeymoon": 271, "Japanese": 150, "Community": 284, "Weather": 317, "Asian": 137, "Hebrew": 151, "Flamenco": 314, "Shuffle": 105}
+ current = ""
+ default = "Alternative"
+ empty = ""
+
+ # redefine
+ streams = {}
+
+
+ # Extracts the category list from www.shoutcast.com,
+ # stores a catmap (title => id)
+ def update_categories(self):
+ html = http.get(self.base_url)
+ #__print__( dbg.DATA, html )
+ self.categories = []
+
+ # Main genres from mobile dropdown
+ """ Alternative """
+ rx = re.compile(r'(\w+\h*\w+)<')
+ main = rx.findall(html)
+ # Genre list in sidebar
+ """ Adult Alternative """
+ rx = re.compile(r'c-genre-(\d+).+?>(\w[\w\h]*\w)<')
+ subs = rx.findall(html)
+ print main
+ print subs
+
+ # group
+ for (id, title) in subs:
+ if (id,title) in main:
+ self.categories.append(title)
+ current = []
+ self.categories.append(current)
+ else:
+ current.append(title)
+ self.catmap[title] = int(id)
+ self.save()
+
+
+ # downloads stream list from shoutcast for given category
+ def update_streams(self, cat):
+
+ if (cat not in self.catmap):
+ __print__( dbg.ERR, "nocat" )
+ return []
+ id = self.catmap[cat]
+
+ # page
+ url = "http://www.shoutcast.com/Home/BrowseByGenre"
+ params = { "genreid": int(id) }
+ referer = None
+ json = http.get(url, params=params, referer=referer, post=1, ajax=1)
+ self.parent.status(0.75)
+
+ # remap JSON
+ entries = []
+ for e in json_decode(json):
+ entries.append({
+ "id": int(e.get("ID", 0)),
+ "genre": str(e.get("Genre", "")),
+ "title": str(e.get("Name", "")),
+ "playing": str(e.get("CurrentTrack", "")),
+ "bitrate": int(e.get("Bitrate", 0)),
+ "listeners": int(e.get("Listeners", 0)),
+ "url": "http://yp.shoutcast.com/sbin/tunein-station.pls?id=%s" % e.get("ID", "0"),
+ "homepage": "",
+ "format": "audio/mpeg"
+ })
+
+ #__print__(dbg.DATA, entries)
+ return entries
+
+
+ # saves .streams and .catmap
+ def save(self):
+ channels.ChannelPlugin.save(self)
+ conf.save("cache/catmap_" + self.module, self.catmap)
+
+ # read previous channel/stream data, if there is any
+ def cache(self):
+ channels.ChannelPlugin.cache(self)
+ self.catmap = conf.load("cache/catmap_" + self.module) or {}