Internet radio browser GUI for music/video streams from various directory services.

⌈⌋ ⎇ branch:  streamtuner2


Check-in [141c3a22fb]

Overview
Comment:More complicated regex to extract paid stations and incomplete entries even (empty titles, or absent homepage URLs). So it gives a more complete listing than PyQuery parsing again.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | 2.1.1
Files: files | file ages | folders
SHA1: 141c3a22fbc26b79df31bbf0f01dcebca34a1192
User & Date: mario on 2014-05-28 16:31:37
Other Links: manifest | tags
Context
2014-05-28
19:53
English-localized version of SurfMusic.de now supported check-in: 4db7cbeed2 user: mario tags: trunk
16:31
More complicated regex to extract paid stations and incomplete entries even (empty titles, or absent homepage URLs). So it gives a more complete listing than PyQuery parsing again. check-in: 141c3a22fb user: mario tags: trunk, 2.1.1
15:54
Updates for 2.1.1 release check-in: e091ecf510 user: mario tags: trunk
Changes

Modified channels/internet_radio.py from [1aa6aca44b] to [bb1ce6b50e].

199
200
201
202
203
204
205
206
207


208
209
210


211
212
213


214
215
216

217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
        # Break up into <tr> blocks before extracting bits
        rx_tr = re.compile("""<tr[^>]*>(.+?)</tr>""", re.S)
        rx_data = re.compile(r"""
               \?u=(https?://[^'">]+/listen\.pls)       
               .*?
               <div[^>]+10px[^>]+>(.+?)</div>           
               .*?
               (?:href="/station/[^>]+|<b)>\s*([^<>]+)\s*</[ab]>
               .*?


               (?:<br>\s*([^<>]+)\s*<br>)+?                
               .*?
               <a[^>]+class="url"[^>]+href="([^<">]+)"  


               .*?
               (?:(\d+)\s+Kbps \s*<br>\s*)+?                  
               (?:(\d+)\s+Listeners \s*<br>\s*)+?             


        """, re.S|re.X)

        for div in rx_tr.findall(html):

            #__print__(dbg.DATA, len(div))
            uu = rx_data.search(div)
            if uu:
                (url, genres, title, playing, homepage, bitrate, listeners) = uu.groups()
                
                # transform data
                r.append({
                    "url": url,
                    "genre": self.strip_tags(genres),
                    "homepage": http.fix_url(homepage),
                    "title": title.strip(),
                    "playing": playing.strip(),
                    "bitrate": int(bitrate if bitrate else 0),
                    "listeners": int(listeners if listeners else 0),
                    "format": "audio/mpeg", # there is no stream info on that, but internet-radio.org.uk doesn't seem very ogg-friendly anyway, so we assume the default here
                })
            else:
                __print__(dbg.ERR, "rx missed", div)
        return r


    # DOM traversing
    def with_dom(self, html_list):
        __print__(dbg.PROC, "internet-radio, dom")
        rx_numbers = re.compile("(\d+)")







|

>
>
|

|
>
>

|
|
>
>



>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
        # Break up into <tr> blocks before extracting bits
        rx_tr = re.compile("""<tr[^>]*>(.+?)</tr>""", re.S)
        rx_data = re.compile(r"""
               \?u=(https?://[^'">]+/listen\.pls)       
               .*?
               <div[^>]+10px[^>]+>(.+?)</div>           
               .*?
               listing2
               .*?
               (?:href="/station/[^>]+> | <b>) ([^<>]+) </[ab]>
               (?:\s*</span>\s*)*
               (?:<br>\s*([^<>]+)\s*<br>)?                
               .*?
               (?:<a[^>]+class="url"[^>]+href="([^<">]+)")?  
               .+
               listing1
               .*?
               (?:(\d+)\s+Kbps \s*<br>\s*)?                  
               (?:(\d+)\s+Listeners)?
               (?:\s*<br>\s*)?
               \s*</td>             
        """, re.S|re.X)

        for div in rx_tr.findall(html):
            if div.find('id="pagination"') < 0:
                #__print__(dbg.DATA, len(div))
                uu = rx_data.search(div)
                if uu:
                    (url, genres, title, playing, homepage, bitrate, listeners) = uu.groups()
                    
                    # transform data
                    r.append({
                        "url": url,
                        "genre": self.strip_tags(genres),
                        "homepage": http.fix_url(homepage),
                        "title": (title if title else "").strip(),
                        "playing": (playing if playing else "").strip(),
                        "bitrate": int(bitrate if bitrate else 0),
                        "listeners": int(listeners if listeners else 0),
                        "format": "audio/mpeg", # there is no stream info on that, but internet-radio.org.uk doesn't seem very ogg-friendly anyway, so we assume the default here
                    })
                else:
                    __print__(dbg.ERR, "rx missed", div)
        return r


    # DOM traversing
    def with_dom(self, html_list):
        __print__(dbg.PROC, "internet-radio, dom")
        rx_numbers = re.compile("(\d+)")