Internet radio browser GUI for music/video streams from various directory services.

โŒˆโŒ‹ โŽ‡ branch:  streamtuner2


Check-in [e11d6b2207]

Overview
Comment:Xiph module: reintroduce YP.XML extraction (still exceedingly slow), simplify JSON cache API usage (may get retired, incurs a delay of its own), and add raw dir.xiph.org website scraping (fastest, and most features).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: e11d6b2207fba8be8e2b07cbe621bdca2d3806f4
User & Date: mario on 2015-05-02 19:56:23
Other Links: manifest | tags
Context
2015-05-02
20:03
Clean out unneeded xml module references. check-in: 4797dcce8e user: mario tags: trunk
19:56
Xiph module: reintroduce YP.XML extraction (still exceedingly slow), simplify JSON cache API usage (may get retired, incurs a delay of its own), and add raw dir.xiph.org website scraping (fastest, and most features). check-in: e11d6b2207 user: mario tags: trunk
19:54
Added default icons to dialog windows, and st2 logo to main window. Introduce more keyboard shortcuts (F6 favicons, Alt-F5 category reload, etc..) check-in: f18582ae4d user: mario tags: trunk
Changes

Modified channels/xiph.py from [f7d2f8fd13] to [0e535d4458].

1
2
3
4
5
6
7
8
9
10

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27







28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

64
65
66
67
68
69
70
71
72
73
74

75
76


77






78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96







97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# encoding: UTF-8
# api: streamtuner2
# title: Xiph.org
# description: ICEcast radio directory. Now utilizes a cached JSON API.
# type: channel
# url: http://dir.xiph.org/
# version: 0.3
# category: radio
# config: 
#    { name: xiph_min_bitrate,  value: 64,  type: int,  description: "minimum bitrate, filter anything below",  category: filter }

# priority: standard
# png:
#   iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABHNCSVQICAgIfAhkiAAAAg5JREFUOI2lk1tIE2AUx3+7CG1tlmlG1rSEHrKgEUF7yO40taQiRj10I4qKkOaT4hIUItuTkC8hpJAQtJCICrFpzEKw
#   h61eQorGNBOTzbEt16ZrnR5Wq3mZD/3heziX//983znngyyov+eSbHEA5WKBhs4BKVy9gsqajqwiCwo0dA5IQX5u2s4moliMPPV1nCeDzxgNBFDHE2wsKMPzsGVefobjcnO7RMfeMuL341ZBrNEGRmPqqjdvsbbf
#   w7irO4Oj+rdywNNNucmERsLUVndR8uYRU13PCew6hpgP8W02xMpIsik++qk5oweW6y3yob8WnXacZDKJWh1Cp4OtRUHsh19TUlUGViv09RGqKAenU5QnLKm+rK88LjgcUnxmr/h8iNO5XYJBRAQZ/qiVeptGWjty
#   5cClDWLwugQRIRiU5UdPCoD6S89jhV6pks9WG6fuwtBtF5v72vC1v+B86SsM+jD56hjnyiM0lRrAbofeXjQJLdE/78jbXSU5166I6f5VeeDdKdq6GtlSd0QkVU+8XsQhlt9W6izbZ5aMKWgtp2WT/yUHd0xSYU7i
#   dsPQ+1WMKIsJD08wEV2HGLeRyNMjawqRxhuKBfdgz1m7fI/4mVX+ZGxmgniOoJv+QZHGAMC7p60ZnHkC8HfzZmLTBCd9af9ccnqMc9HTdmFe4kLkJbH/4h0xVtcu+SP/C78AL6btab6woPcAAAAASUVORK5CYII=
#
# Xiph.org maintains the Ogg streaming standard and Vorbis
# audio compression format, amongst others.  The ICEcast
# server is an alternative to SHOUTcast.
#
# It also provides a directory listing of known internet
# radio stations, only a handful of them using Ogg though.
#
# The category list is hardwired in this plugin.
#









from config import *
from uikit import uikit
import ahttp
from channels import *
#from xml.sax.saxutils import unescape as entity_decode, escape as xmlentities
#import xml.dom.minidom
import json
import re


          
# Xiph via I-O
#
#
# Xiph meanwhile provides a JSOL dump, which is faster to download and process.
# So we'll use that over the older yp.xml. (Sadly it also doesn't output
# homepage URLs, listeners, etc.)
#
# Xiphs JSON is a horrible mysqldump concatenation, not parseable. Thus it's
# refurbished on //api.include-once.org/xiph/cache.php for consumption. Which
# also provides compressed HTTP transfers and category slicing.
#
# Xiph won't be updating the directory for another while. The original feature
# request is now further delayed as summer of code project:
# ยท https://trac.xiph.org/ticket/1958
# ยท https://wiki.xiph.org/Summer_of_Code_2015#Stream_directory_API
#
class xiph (ChannelPlugin):

  # attributes
  listformat = "srv"
  has_search = True
  json_url = "http://api.include-once.org/xiph/cache.php"
  #xml_url = "http://dir.xiph.org/yp.xml"


  # content
  categories = [ "pop", "top40" ]
  
  
  # prepare category names
  def __init__(self, parent=None):
      
      self.categories = []
      self.filter = {}
      for main in self.genres:

          if (type(main) == str):
              id = main.split("|")


              self.categories.append(id[0].title())






              self.filter[id[0]] = main
          else:
              l = []
              for sub in main:
                  id = sub.split("|")
                  l.append(id[0].title())
                  self.filter[id[0]] = sub
              self.categories.append(l)
      
      # GUI
      ChannelPlugin.__init__(self, parent)


  # just counts genre tokens, does not automatically create a category tree from it
  def update_categories(self):
      pass


  # downloads stream list from xiph.org for given category







  def update_streams(self, cat, search=None):

      # With the new JSON cache API on I-O, we can load categories individually:
      params = {}
      if cat:
          params["cat"] = cat.lower()
      if search:
          params["search"] = search
      
      #-- get data
      data = ahttp.get(self.json_url, params=params)
      #log.DATA(data)
      
      #-- extract
      l = []
      log.PROC( "processing api.dir.xiph.org JSON (via api.include-once.org cache)" )
      data = json.loads(data)
      for e in data:
          #log.DATA(e)
          bitrate = int(e["bitrate"])
          if conf.xiph_min_bitrate and bitrate and bitrate >= int(conf.xiph_min_bitrate):
              if not len(l) or l[-1]["title"] != e["stream_name"]:
                  l.append({






|


|
>














<


>
>
>
>
>
>
>






<
|





|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<






|
>


|
|
|
|
<
|
|
|
|
>
|
|
>
>
|
>
>
>
>
>
>
|
|
<
<
<
<
<
<
|
<
<
|

<
<
<


|
>
>
>
>
>
>
>
|


|
<
<
<
<
<
<





<







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40

41
42
43
44
45
46
47















48
49
50
51
52
53
54
55
56
57
58
59
60
61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79






80


81
82



83
84
85
86
87
88
89
90
91
92
93
94
95
96






97
98
99
100
101

102
103
104
105
106
107
108
# encoding: UTF-8
# api: streamtuner2
# title: Xiph.org
# description: ICEcast radio directory. Now utilizes a cached JSON API.
# type: channel
# url: http://dir.xiph.org/
# version: 0.4
# category: radio
# config: 
#    { name: xiph_min_bitrate, value: 64, type: int, description: "Minimum bitrate; filter lesser quality streams.", category: filter }
#    { name: xiph_source, value: cache, type: select, select: "cache=JSON cache srv|xml=Clunky XML blob|web=Forbidden fruits", description: "Source for station list extraction." }
# priority: standard
# png:
#   iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABHNCSVQICAgIfAhkiAAAAg5JREFUOI2lk1tIE2AUx3+7CG1tlmlG1rSEHrKgEUF7yO40taQiRj10I4qKkOaT4hIUItuTkC8hpJAQtJCICrFpzEKw
#   h61eQorGNBOTzbEt16ZrnR5Wq3mZD/3heziX//983znngyyov+eSbHEA5WKBhs4BKVy9gsqajqwiCwo0dA5IQX5u2s4moliMPPV1nCeDzxgNBFDHE2wsKMPzsGVefobjcnO7RMfeMuL341ZBrNEGRmPqqjdvsbbf
#   w7irO4Oj+rdywNNNucmERsLUVndR8uYRU13PCew6hpgP8W02xMpIsik++qk5oweW6y3yob8WnXacZDKJWh1Cp4OtRUHsh19TUlUGViv09RGqKAenU5QnLKm+rK88LjgcUnxmr/h8iNO5XYJBRAQZ/qiVeptGWjty
#   5cClDWLwugQRIRiU5UdPCoD6S89jhV6pks9WG6fuwtBtF5v72vC1v+B86SsM+jD56hjnyiM0lRrAbofeXjQJLdE/78jbXSU5166I6f5VeeDdKdq6GtlSd0QkVU+8XsQhlt9W6izbZ5aMKWgtp2WT/yUHd0xSYU7i
#   dsPQ+1WMKIsJD08wEV2HGLeRyNMjawqRxhuKBfdgz1m7fI/4mVX+ZGxmgniOoJv+QZHGAMC7p60ZnHkC8HfzZmLTBCd9af9ccnqMc9HTdmFe4kLkJbH/4h0xVtcu+SP/C78AL6btab6woPcAAAAASUVORK5CYII=
#
# Xiph.org maintains the Ogg streaming standard and Vorbis
# audio compression format, amongst others.  The ICEcast
# server is an alternative to SHOUTcast.
#
# It also provides a directory listing of known internet
# radio stations, only a handful of them using Ogg though.

# The category list is hardwired in this plugin.
#
# And there are three fetch-modes now:
#  โ†’ "Cache" retrieves a refurbished JSON station list,
#    both sliceable genres and searchable.
#  โ†’ "XML" fetches the olden YP.XML once, buffers it,
#    and tries to uncover per-genre categories from it.
#  โ†’ "HTML" extracts from the raw dir.xiph.org directory,
#    where homepages and listener/max infos are available.


from config import *
from uikit import uikit
import ahttp
from channels import *

import xml.dom.minidom
import json
import re


          
# Xiph directory service















class xiph (ChannelPlugin):

  # attributes
  listformat = "srv"
  has_search = True
  json_url = "http://api.include-once.org/xiph/cache.php"
  xml_url = "http://dir.xiph.org/yp.xml"
  web_url = "http://dir.xiph.org/"

  # content
  categories = []


  # Categories are basically just the static .genre list

  def update_categories(self):
      self.categories = [
         g.title() if isinstance(g, str) else [s.title() for s in g]
         for g in self.genres
      ]  # entries contain no "|" search patterns anymore


  # Switch to JSON, XML or HTML extractor
  def update_streams(self, cat=None, search=None):
      if cat:
          cat = cat.lower()
      if conf.xiph_source in ("cache", "json"):
          log.PROC("Xiph mode: processing api.dir.xiph.org JSON (via api.include-once.org cache)")
          return self.from_json_cache(cat, search)
      elif conf.xiph_source in ("xml", "buffy"):
          log.PROC("Xiph mode: xml.dom.minidom to traverse yp.xml")
          return self.from_yp_xml(cat, search)
      else:






          log.PROC("Xiph mode: extract from dir.xiph.org HTML listings")


          return self.from_raw_html(cat, search)






  # Retrieve partial stream list from api.include-once.org cache / JSON API wrapper
  #
  # The server interface is specifically designed for streamtuner2. It refurbishes
  # Xiphs JSOL dump (which is impossible to fix in Python, but easier per PHP).
  # It doesn't contain homepage links, etc either.
  # While Xiph.org promised fixing their very own JSON API, it's delayed through
  # summer of code again. <https://trac.xiph.org/ticket/1958>
  #
  def from_json_cache(self, cat, search=None):

      # With the new JSON cache API on I-O, we can load categories individually:
      params = dict(search=search) if search else dict(cat=cat)






      data = ahttp.get(self.json_url, params=params)
      #log.DATA(data)
      
      #-- extract
      l = []

      data = json.loads(data)
      for e in data:
          #log.DATA(e)
          bitrate = int(e["bitrate"])
          if conf.xiph_min_bitrate and bitrate and bitrate >= int(conf.xiph_min_bitrate):
              if not len(l) or l[-1]["title"] != e["stream_name"]:
                  l.append({
129
130
131
132
133
134
135


136







































































































137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
                  })
          
      # send back the list 
      return l













































































































  genres = [
        "pop",
        [
            "top40",
            "90s",
            "80s",
            "britpop",
            "disco",
            "urban",
            "party",
            "mashup",
            "kpop",
            "jpop",
            "lounge",
            "softpop",
            "top",
            "popular",
            "schlager",
        ],
        "rock",
        [
            "alternative",
            "electro",
            "country",
            "mixed",
            "metal",
            "eclectic",
            "folk",
            "anime",
            "hardcore",
            "pure"
            "jrock"
        ],
        "dance",
        [
            "electronic",
            "deephouse",
            "dancefloor",
            "elektro"
            "eurodance"
            "b",
            "r",
        ],
        "hits",
        [
            "russian"
            "hit",
            "star"
        ],
        "radio",
        [
            "live",
            "community",
            "student",
            "internet",
            "webradio",
        ],
        "classic",
        [
             "classical",
             "ebu",
             "vivaldi",
             "piano",
             "opera",
             "classix",
             "chopin",
             "renaissance",
             "classique",
        ],
        "talk",
        [
            "news",
            "politics",
            "medicine",
            "health"
            "sport",
            "education",
            "entertainment",
            "podcast",
        ],
        "various",
        [
            "hits",
            "ruhit",
            "mega"
        ],
        "house",
        [
            "lounge",
            "trance",
            "techno",
            "handsup",
            "gay",
            "breaks",
            "dj",
        "electronica",
        ],
        "trance",
        [
            "clubbing",
            "electronical"
        ],
        "jazz",
        [
            "contemporary"
        ],
        "oldies",
        [
            "golden",
            "decades",
            "info",
            "70s",
            "60s"
        ],
        "religious",
        [
            "spiritual",
            "inspirational",
            "christian",
            "catholic",
            "teaching",
            "christmas",
            "gospel",
        ],
        "music",
        "unspecified",
        "misc",
        "adult",
        "indie",
        [
            "reggae",
            "blues",
            "college",
            "soundtrack"
        ],
        "mixed",
        [
            "disco",
            "mainstream",
            "soulfull"
        ],
        "funk",
        "hiphop",
        [
            "rap",
            "dubstep",
            "hip",
            "hop"
        ],
        "top",
        [
            "urban"
        ],
        "musica",
        "ambient",
        [
            "downtempo",
            "dub"
        ],
        "promodj",
        "world",    # REGIONAL
        [
            "france",
            "greek",
            "german",
            "westcoast",
            "bollywood",
            "indian",
            "nederlands",
            "europa",
            "italia",
            "brazilian",
            "tropical",
            "korea",
            "seychelles",
            "black",
            "japanese",
            "ethnic",
            "country",
            "americana",
            "western",
            "cuba",
            "afrique",
            "paris",
            "celtic",
            "ambiance",
            "francais",
            "liberte",
            "anglais",
            "arabic",
            "hungary",
            "folklore"
            "latin",
            "dutch"
            "italy"
        ],
        "artist",   # ARTIST NAMES
        [
            "mozart",
            "beatles",
            "michael",
            "nirvana",
            "elvis",
            "britney",
            "abba",
            "madonna",
            "depeche",
        ],
        "salsa",
        "love",
        "la",
        "soul",
        "techno",
        [
            "club",
            "progressive",
            "deep"
        "electro",
        ],
        "best",
        "100%",
        "rnb",
        "retro",
        "new",
        "smooth",
        [
            "cool"
        ],
        "easy",
        [
            "lovesongs",
            "relaxmusic"
        ],
        "chillout",
        "slow",
        [
            "soft"
        ],
        "mix",







>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



|
<
<
<
<
<
<
|
<
<
<
<
<
<




|
<
<
<
<
|
<
<
<
<
<



|
<
<
<
<
|
<



|
<
<



<
|
<
<
<



|
<
<
<
<
<
|
<
<



<
<
<
<
<
|
|
<



|
<
<



|
<
<
<
<
<
<
|



|
<







|
<
<
<
<



|
<
<
<
|
<
<







|
<
<
<



|
<
<




|
<
<
<








|
<




|
<
<
<
<
<
|
<
<
<
<
<
|
<
<
<
<
|
<
<
<
<
<
|
<
<
<
<
|
<
<
<
<



|
<
<
<
<
<
|
<
<







<
|
<
<












|
<







118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234






235






236
237
238
239
240




241





242
243
244
245




246

247
248
249
250


251
252
253

254



255
256
257
258





259


260
261
262





263
264

265
266
267
268


269
270
271
272






273
274
275
276
277

278
279
280
281
282
283
284
285




286
287
288
289



290


291
292
293
294
295
296
297
298



299
300
301
302


303
304
305
306
307



308
309
310
311
312
313
314
315
316

317
318
319
320
321





322





323




324





325




326




327
328
329
330





331


332
333
334
335
336
337
338

339


340
341
342
343
344
345
346
347
348
349
350
351
352

353
354
355
356
357
358
359
                  })
          
      # send back the list 
      return l



  # Extract complete YP.XML, but just filter for genre/cat
  def from_yp_xml(self, cat, search=None, buffy=[]):

      # Theoretically we could really buffer the extracted station lists.
      # But it's a huge waste of memory to keep it around for unused
      # categories.  Extracting all streams{} at once would be worse. Yet
      # enabling this buffer method prevents partial reloading..
      if conf.xiph_source != "buffy":
          buffy = []

      # Get XML blob
      yp = ahttp.get(self.xml_url, statusmsg="Brace yourselves, still downloading the yp.xml blob.")
      log.DATA("returned")
      self.status("Yes, XML parsing isn't much faster either.", timeout=20)
      for entry in xml.dom.minidom.parseString(yp).getElementsByTagName("entry"):
          bits = bitrate(x(entry, "bitrate"))
          if bits and conf.xiph_min_bitrate and bits >= int(conf.xiph_min_bitrate):
              buffy.append({
                  "title": x(entry, "server_name"),
                  "url": x(entry, "listen_url"),
                  "format": self.mime_fmt(x(entry, "server_type")[6:]),
                  "bitrate": bits,
                  "channels": x(entry, "channels"),
                  "samplerate": x(entry, "samplerate"),
                  "genre": x(entry, "genre"),
                  "playing": x(entry, "current_song"),
                  "listeners": 0,
                  "max": 0,
                  "homepage": "",
              })
      self.status("This. Is. Happening. Now.")

      # Filter out a single subtree
      l = []
      if cat:
          rx = re.compile(cat.lower())
          l = []
          for row in buffy:
              if rx.search(row["genre"]):
                  l.append(row)

      elif search:
	      pass
        
      # Result category
      return l



  # Fetch directly from website. Which Xiph does not approve of; but
  # hey, it's a fallback option here. And the only way to actually
  # uncover station homepages.
  #@use_rx
  def from_raw_html(self, cat, search=None, use_rx=False):

      # Build request URL
      if search:
          return []
      elif cat in ("Ogg_Vorbis", "NSV", "WebM", "Opus"):
          url = "http://dir.xiph.org/by_format/{}".format(cat)
      elif cat:
          url = "http://dir.xiph.org/by_genre/{}".format(cat.title())

      # Collect all result pages
      html = ahttp.get(url)
      for i in range(1,4):
          self.status(i/5.1)
          html += ahttp.get(url, {"search": cat.title(), "page": i})
      try: html = html.encode("raw_unicode_escape").decode("utf-8")
      except: pass

      # Find streams
      r = []
      #for row in re.findall("""<tr class="row[01]">(.+?)</tr>""", html, re.X|re.S):
      #    pass
      ls = re.findall("""
          <tr\s+class="row[01]">
          .*? class="name">
               <a\s+href="(.*?)"[^>]*>
                (.*?)</a>
          .*? "listeners">\[(\d+)
          .*? "stream-description">(.*?)<
          .*? Tags: (.*?) </div>
          .*? href="(/listen/\d+/listen.xspf)"
          .*? class="format"\s+title="([^"]+)"
          .*? /by_format/([^"]+)
      """, html, re.X|re.S)
      
      # Assemble
      for homepage, title, listeners, playing, tags, url, bits, fmt in ls:
          r.append(dict(
              genre = clean(tags),
              title = clean(title),
              homepage = ahttp.fix_url(homepage),
              playing = clean(playing),
              url = "http://dir.xiph.org{}".format(url),
              listformat = "xspf",
              listeners = int(listeners),
              bitrate = bitrate(bits),
              format = self.mime_fmt(guess_format(fmt)),
          ))
      return r



  # Static list of categories
  genres = [
        "pop",
        [
            "top40", "90s", "80s", "britpop", "disco", "urban", "party",






            "mashup", "kpop", "jpop", "lounge", "softpop", "top", "popular",






            "schlager",
        ],
        "rock",
        [
            "alternative", "electro", "country", "mixed", "metal",




            "eclectic", "folk", "anime", "hardcore", "pure" "jrock"





        ],
        "dance",
        [
            "electronic", "deephouse", "dancefloor", "elektro" "eurodance"




            "rnb",

        ],
        "hits",
        [
            "russian" "hit", "star"


        ],
        "radio",
        [

            "live", "community", "student", "internet", "webradio",



        ],
        "classic",
        [
             "classical", "ebu", "vivaldi", "piano", "opera", "classix",





             "chopin", "renaissance", "classique",


        ],
        "talk",
        [





            "news", "politics", "medicine", "health" "sport", "education",
            "entertainment", "podcast",

        ],
        "various",
        [
            "hits", "ruhit", "mega"


        ],
        "house",
        [
            "lounge", "trance", "techno", "handsup", "gay", "breaks", "dj",






            "electronica",
        ],
        "trance",
        [
            "clubbing", "electronical"

        ],
        "jazz",
        [
            "contemporary"
        ],
        "oldies",
        [
            "golden", "decades", "info", "70s", "60s"




        ],
        "religious",
        [
            "spiritual", "inspirational", "christian", "catholic",



            "teaching", "christmas", "gospel",


        ],
        "music",
        "unspecified",
        "misc",
        "adult",
        "indie",
        [
            "reggae", "blues", "college", "soundtrack"



        ],
        "mixed",
        [
            "disco", "mainstream", "soulfull"


        ],
        "funk",
        "hiphop",
        [
            "rap", "dubstep", "hip", "hop"



        ],
        "top",
        [
            "urban"
        ],
        "musica",
        "ambient",
        [
            "downtempo", "dub"

        ],
        "promodj",
        "world",    # REGIONAL
        [
            "france", "greek", "german", "westcoast", "bollywood", "indian",





            "nederlands", "europa", "italia", "brazilian", "tropical",





            "korea", "seychelles", "black", "japanese", "ethnic", "country",




            "americana", "western", "cuba", "afrique", "paris", "celtic",





            "ambiance", "francais", "liberte", "anglais", "arabic",




            "hungary", "folklore" "latin", "dutch" "italy"




        ],
        "artist",   # ARTIST NAMES
        [
            "mozart", "beatles", "michael", "nirvana", "elvis", "britney",





            "abba", "madonna", "depeche",


        ],
        "salsa",
        "love",
        "la",
        "soul",
        "techno",
        [

            "club", "progressive", "deep", "electro",


        ],
        "best",
        "100%",
        "rnb",
        "retro",
        "new",
        "smooth",
        [
            "cool"
        ],
        "easy",
        [
            "lovesongs", "relaxmusic"

        ],
        "chillout",
        "slow",
        [
            "soft"
        ],
        "mix",
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
        "musica",
        "old",
        "emisora",
        "img",
        "rockabilly",
        "charts",
        [
            "best80",
            "70er",
            "80er",
            "60er"
            "chart",
        ],
        "other",
        [
            "varios"
        ],
        "soulful",
        "listening",







|
<
<
<
<







409
410
411
412
413
414
415
416




417
418
419
420
421
422
423
        "musica",
        "old",
        "emisora",
        "img",
        "rockabilly",
        "charts",
        [
            "best80", "70er", "80er", "60er" "chart",




        ],
        "other",
        [
            "varios"
        ],
        "soulful",
        "listening",
503
504
505
506
507
508
509

510
511



































        ],
        "ska",
        [
            "punkrock",
            "oi"
        ],
        "darkwave",

    ]











































>


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
        ],
        "ska",
        [
            "punkrock",
            "oi"
        ],
        "darkwave",
        "Ogg_Vorbis", "NSV", "WebM", "Opus",
    ]



# Helper functions for XML extraction mode

# Shortcut to get text content from XML subnode by name
def x(node, name):
    e = node.getElementsByTagName(name)
    if (e):
        if (e[0].childNodes):
            return str(e[0].childNodes[0].data)
    return ""

# Convert bitrate string or "Quality \d+" to integer
def bitrate(str):
    uu = re.findall("(\d+)", str)
    if uu:
        br = uu[0]
        if br > 10:
            return int(br)
        else:
            return int(br * 25.6)
    else:
        return 0


# Extract mime type from text
rx_fmt = re.compile("ogg|mp3|mp4|theora|nsv|webm|opus|mpeg")
def guess_format(str):
    return rx_fmt.findall(str.lower() + "mpeg")[0]

# Clean up HTML text snippets
def clean(str):
    return nl(entity_decode(strip_tags(str)))