Internet radio browser GUI for music/video streams from various directory services.

⌈⌋ ⎇ branch:  streamtuner2


Check-in [3a5e6068b9]

Overview
Comment:Fix regex parsing for new sparse shoutcast.com
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 3a5e6068b90f37971254d6296cef23b93bd2558d
User & Date: mario on 2014-04-05 23:17:16
Other Links: manifest | tags
Context
2014-04-05
23:32
use new `version` command line tool check-in: 62b4121741 user: mario tags: trunk
23:17
Fix regex parsing for new sparse shoutcast.com check-in: 3a5e6068b9 user: mario tags: trunk
2014-01-06
22:45
prepare for gtk3 check-in: c0702405f8 user: mario tags: trunk
Changes

Modified channels/shoutcast.py from [59a1f308de] to [8c7ec7957d].

72
73
74
75
76
77
78
79

80
81
82
83
84
85



86
87

88
89
90


91

92
93
94
95
96
97




98
99
100
101
102
103
104
72
73
74
75
76
77
78

79
80
81
82



83
84
85
86

87
88
89
90
91
92

93






94
95
96
97
98
99
100
101
102
103
104







-
+



-
-
-
+
+
+

-
+



+
+
-
+
-
-
-
-
-
-
+
+
+
+







        streams = {}
        
            
        # extracts the category list from shoutcast.com,
        # sub-categories are queried per 'AJAX'
        def update_categories(self):
            html = http.get(self.base_url)
            self.categories = ["default"]
            self.categories = []
            __print__( html )

            # <h2>Radio Genres</h2>
	    rx_main = re.compile(r'<li class="prigen" id="(\d+)".+?<a href="/radio/([\w\s]+)">[\w\s]+</a></li>', re.S)
	    rx_sub = re.compile(r'<a href="/radio/([\w\s\d]+)">[\w\s\d]+</a></li>')
            for uu in rx_main.findall(html):
	    rx = re.compile(r'<li((?:\s+id="\d+"\s+class="files")?)><a href="\?action=sub&cat=([\w\s]+)#(\d+)">[\w\s]+</a>', re.S)
            sub = []
            for uu in rx.findall(html):
                __print__(uu)
		(id,name) = uu
		(main,name,id) = uu
                name = urllib.unquote(name)

                # main category
                if main:
                    if sub:
                self.categories.append(name)
                        self.categories.append(sub)

                # sub entries
                html = http.ajax("http://shoutcast.com/genre.jsp", {"genre":name, "id":id})
                __print__(html)
                sub = rx_sub.findall(html)
                self.categories.append(sub)
                        sub = []
                    self.categories.append(name)
                else:
                    sub.append(name)

            # it's done
            __print__(self.categories)
            conf.save("cache/categories_shoutcast", self.categories)
            pass


118
119
120
121
122
123
124
125
126
127
128

129



130
131
132
133




134
135
136

137
138
139













140
141
142
143
144

145
146


147
148
149
150



151
152
153

154
155
156
157
158
159
160


161
162

163

164
165
166
167
168
169






170
171
172
173
174
175
176

177
178
179
180
181
182




183
184
185
186
187
188
189



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204

205
206


207
208
209
210
211
212
213
118
119
120
121
122
123
124

125
126

127
128
129
130
131
132



133
134
135
136
137
138
139
140
141
142

143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161


162
163




164
165
166
167
168
169
170
171
172
173
174
175


176
177
178

179
180
181






182
183
184
185
186
187
188
189
190
191
192
193

194
195
196




197
198
199
200







201
202
203
204
205

206
207
208
209
210
211
212
213




214
215

216
217
218
219
220
221
222
223
224







-


-
+

+
+
+

-
-
-
+
+
+
+



+


-
+
+
+
+
+
+
+
+
+
+
+
+
+





+
-
-
+
+
-
-
-
-
+
+
+



+





-
-
+
+

-
+

+
-
-
-
-
-
-
+
+
+
+
+
+






-
+


-
-
-
-
+
+
+
+
-
-
-
-
-
-
-
+
+
+


-








-
-
-
-
+

-
+
+








            # loop
            entries = []
            next = 0
            max = int(conf.max_streams)
            count = max
            rx_stream = None
            rx_next = re.compile("""onclick="showMoreGenre""")

            try:
               while (next < max):
               if (next < max):


                  #/radiolist.cfm?action=sub&string=&cat=Oldies&_cf_containerId=radiolist&_cf_nodebug=true&_cf_nocache=true&_cf_rc=0
                  #/radiolist.cfm?start=19&action=sub&string=&cat=Oldies&amount=18&order=listeners
                  # page
                  url = "http://www.shoutcast.com/genre-ajax/" + ucat
                  referer = url.replace("/genre-ajax", "/radio")
                  params = { "strIndex":"0", "count":str(count), "ajax":"true", "mode":"listeners", "order":"desc" }
                  url = "http://www.shoutcast.com/radiolist.cfm?action=sub&string=&cat="+ucat+"&order=listeners&amount="+str(count)
                  __print__(url)
                  referer = "http://www.shoutcast.com/?action=sub&cat="+ucat
                  params = {} # "strIndex":"0", "count":str(count), "ajax":"true", "mode":"listeners", "order":"desc" }
                  html = http.ajax(url, params, referer)   #,feedback=self.parent.status)

                  __print__(html)
                  __print__(re.compile("id=(\d+)").findall(html));

                  # regular expressions
                  if not conf.get("pyquery") or not pq:
                  if 1:  #not conf.get("pyquery") or not pq:

                      # new html
                      """ 
                      <tr>
                         <td width="6%"><a href="#" onClick="window.open('player/?radname=Schlagerhoelle%20%2D%20das%20Paradies%20fr%20Schlager%20%20und%20Discofox&stationid=14687&coding=MP3','radplayer','height=232,width=776')"><img class="icon transition" src="/img/icon-play.png" alt="Play"></a></td>
                         <td width="30%"><a class="transition" href="http://yp.shoutcast.com/sbin/tunein-station.pls?id=14687">Schlagerhoelle - das Paradies fr Schlager  und Discofox</a></td>
                         <td width="12%" style="text-align:left;" width="10%">Oldies</td>
                         <td width="12%" style="text-align:left;" width="10%">955</td>
                         <td width="12%" style="text-align:left;" width="10%">128</td>
                         <td width="12%" style="text-align:left;" width="10%">MP3</td>
                      </tr>
                      """
                  
                      # new extraction regex
                      if not rx_stream:
                          rx_stream = re.compile(
                              """
                               <a [^>]+  href="http://yp.shoutcast.com/sbin/tunein-station.pls\?
                              <a\s+class="?playbutton\d?[^>]+id="(\d+)".+?
                              <a\s+class="[\w\s]*title[\w\s]*"[^>]+href="(http://[^">]+)"[^>]*>([^<>]+)</a>.+?
                                         id=(\d+)">   ([^<>]+)   </a>  </td>
                               \s+  <td [^>]+  >([^<>]+)</td>
                              (?:Recently\s*played|Coming\s*soon|Now\s*playing):\s*([^<]*).+?
                              ners">(\d*)<.+?
                              bitrate">(\d*)<.+?
                              type">([MP3AAC]*)
                               \s+  <td [^>]+  >(\d+)</td>
                               \s+  <td [^>]+  >(\d+)</td>
                               \s+  <td [^>]+  >(\w+)</td>
                              """,
                              re.S|re.I|re.X
                          )
                      __print__( rx_stream)

                      # extract entries
                      self.parent.status("parsing document...")
                      __print__("loop-rx")
                      for m in rx_stream.findall(html):
                          (id, homepage, title, playing, ls, bit, fmt) = m
                          __print__(uu)
                          __print__(m)
                          (id, title, genre, listeners, bitrate, fmt) = m
                          entries += [{
                              "title": self.entity_decode(title),
                              "id": id,
                              "url": "http://yp.shoutcast.com/sbin/tunein-station.pls?id=" + id,
                              "title": self.entity_decode(title),
                              "homepage": http.fix_url(homepage),
                              "playing": self.entity_decode(playing),
                              "genre": cat, #self.strip_tags(uu[4]),
                              "listeners": int(ls),
                              "max": 0, #int(uu[6]),
                              "bitrate": int(bit),
                              #"homepage": http.fix_url(homepage),
                              #"playing": self.entity_decode(playing),
                              "genre": genre,
                              "listeners": int(listeners),
                              #"max": 0, #int(uu[6]),
                              "bitrate": int(bitrate),
                              "format": self.mime_fmt(fmt),
                          }]

                  # PyQuery parsing
                  else:
                      # iterate over DOM
                      for div in (pq(e) for e in pq(html).find("div.dirlist")):
                      for div in (pq(e) for e in pq(html).find("tr")):

                          entries.append({
                               "title": div.find("a.playbutton,a.playbutton1").attr("title"),
                               "url": div.find("a.playbutton,a.playbutton1").attr("href"),
                               "homepage": http.fix_url(div.find("a.div_website").attr("href")),
                               "playing": div.find("div.playingtext").attr("title"),
                               "title": div.find("a.transition").text(),
                               "url": div.find("a.transition").attr("href"),
                               "homepage": "",
                               "playing": div.find("td:eq(2)").text(),
   #                            "title": div.find("a.clickabletitleGenre, div.stationcol a").attr("title"),
   #                            "url": div.find("a.playbutton, a.playbutton1, a.playimage").attr("href"),
   #                            "homepage": http.fix_url(div.find("a.playbutton.clickabletitle, a[target=_blank], a.clickabletitleGenre, a.clickabletitle, div.stationcol a, a").attr("href")),
   #                            "playing": div.find("div.playingtextGenre, div.playingtext").attr("title"),
                               "listeners": int(div.find("div.dirlistners").text()),
                               "bitrate": int(div.find("div.dirbitrate").text()),
                               "format": self.mime_fmt(div.find("div.dirtype").text()),
                               "listeners": int(div.find("td:eq(4)").text()),
                               "bitrate": int(div.find("td:eq(5)").text()),
                               "format": self.mime_fmt(div.find("td:eq(6)").text()),
                               "max": 0,
                               "genre": cat,
                              # "title2": e.find("a.playbutton").attr("name"),
                          })


                  # display partial results (not strictly needed anymore, because we fetch just one page)
                  self.parent.status()
                  self.update_streams_partially_done(entries)
                  
                  # more pages to load?
                  if (re.search(rx_next, html)):
                     next += count
                  else:
                     next = 99999
                  next = 99999
                     
            except:
            except Exception as e:
               __print__(e)
               return entries
            
            #fin
            __print__(entries)
            return entries