Internet radio browser GUI for music/video streams from various directory services.

⌈⌋ branch:  streamtuner2


Check-in [9688154862]

Overview
Comment:Switched radionet plugin to resolve_urn() and speedier
-grep loop.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:9688154862a61e33ba96cb4eefd87da3d39fb1ee
User & Date: mario on 2019-03-24 11:50:24
Other Links: manifest | tags
Context
2019-06-14
18:25
streema: fix url regex (now alphanumeric titles) and urn_resolve patterns for new player layout Leaf check-in: fac6bc374a user: mario tags: trunk
2019-03-24
11:50
Switched radionet plugin to resolve_urn() and speedier
-grep loop.
check-in: 9688154862 user: mario tags: trunk
10:25
Crude fix for new station lookup. Regex still has horrible backtracking. (Should use resolve_urn rather than rnjs playlist workaround.) check-in: 6bfe67e367 user: mario tags: trunk
Changes

Modified contrib/radionet.py from [632ecf0169] to [edd29444df].

1
2
3
4
5
6
7
8
9
10
11
12
13
..
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
..
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
..
86
87
88
89
90
91
92
93
94
95
96
97
98

99
100
101
102
103
104
105
106

107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

123
124
125
126
127
128
129

130
131
132
133
134
135
136
137
138
139
# encoding: UTF-8
# api: streamtuner2
# title: radio.net
# description: Europe's biggest radio platform
# url: http://radio.net/
# version: 0.7
# type: channel
# category: radio
# png:
#   iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAt0lEQVR42mNgYGD4r+Ar/F/BDwkD+SBxojBMs1mLPBArgGlFqEEENYMNQNLsukIDYkirAvGu
#   ABsA1OC6XOP/5f8nwIaYAg0k2gBFsAsgTgcZkvnfDugFEeK9AFKsCPMG0CU6eZJgQ4R1eP8H7LLEivWyFJANQcQCLPBAmkGG4MJohmA6C6QA5gI5OxEUDNII
#   MwSvASBFIA3ociCxkWQAKMDICkSQIpgh2LDnSmP80YhsCFEJiRIMADpmeUOpqgjRAAAAAElFTkSuQmCC
# priority: optional
................................................................................
# extraction-method: regex
#
# Radio.net lists around 20.000 worldwide radio stations.
# A maximum of three pages from each genre are fetched here,
# some of the empty categories already omitted.
#
# The website heavily depends on JavaScript, a Flash player,
# some social tracking cookies. But still feasible to access
# per custom JSON extractor.
#
# May require refreshing the station lists once in a while,
# because there's an API key in each JSON station info URL.


import time
import json
import re
from config import *
from channels import *
................................................................................
#   → "streamUrl": and "description": are scanned for.
#
class radionet (ChannelPlugin):

    # control flags
    has_search = False
    audioformat = "audio/mpeg"
    listformat = "rnjs"
    titles = dict(listeners=False, playing="Description")

    # sources
    apiPrefix = "https://api.radio.net/info/v2"
    genre_url = "http://www.radio.net/genre/{}"
    apiKey = None
    
................................................................................

        # category page, get key
        html = ahttp.get(self.genre_url.format(cat))
        for p in range(2, 4):
            if html.find('"?p={}">'.format(p)) >= 0:
                html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p))
        self.set_key(html)
        log.HTML(html)
        r = []


        # split station blocks
        for row in re.split("""<div class="stationinfo""", html)[1:]:

        
            # extract text fields
            d = re.findall("""
              <a\s+href="(?:https?:)?(//(?:[\w-]+)\.radio\.net/s/([^"]+)/?)" .*?
              <img[^>]+ src="([^<">]+)" .*?
              <strong[^>]*>(.*?)</strong> .*?
              <small[^>]*>\s*(.*?)\s*</small> .*?
            """, row, re.X|re.S)

            
            # refurbish extracted strings
            if d and len(d) and len(d[0]) == 5:
                href, name, img, title, desc = d[0]
                r.append(dict(
                    name = name,
                    genre = cat,
                    title = unhtml(title),
                    playing = unhtml(desc),
                    url = "http:{}".format(href), #self._url(name),
                    homepage = "http:{}".format(href),
                    img = img,
                ));
        return r
    


    # Patch together JSON station info URL
    def _url(self, name):
        return \
        "{}/search/station?apikey={}&pageindex=1&station={}".format(
            self.apiPrefix, self.apiKey, name
        )      # '?_={time}&' is omitted here, only relevant to jQuery/AJAX,
               # and just made bookmarks.is_in() fail due to randomized URLs



    # extract JavaScript key from any HTML blob (needed for station query)
    def set_key(self, html):
        ls = re.findall("""apiKey: '(\w+)'""", html)
        if ls:
            self.apiKey = ls[0]








|







 







|
|
|
<
<







 







|







 







<




|
>






|

>









|




|
<
>
|
|
|
|
|
|
|
>
|









1
2
3
4
5
6
7
8
9
10
11
12
13
..
14
15
16
17
18
19
20
21
22
23


24
25
26
27
28
29
30
..
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
..
84
85
86
87
88
89
90

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# encoding: UTF-8
# api: streamtuner2
# title: radio.net
# description: Europe's biggest radio platform
# url: http://radio.net/
# version: 0.8
# type: channel
# category: radio
# png:
#   iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAt0lEQVR42mNgYGD4r+Ar/F/BDwkD+SBxojBMs1mLPBArgGlFqEEENYMNQNLsukIDYkirAvGu
#   ABsA1OC6XOP/5f8nwIaYAg0k2gBFsAsgTgcZkvnfDugFEeK9AFKsCPMG0CU6eZJgQ4R1eP8H7LLEivWyFJANQcQCLPBAmkGG4MJohmA6C6QA5gI5OxEUDNII
#   MwSvASBFIA3ociCxkWQAKMDICkSQIpgh2LDnSmP80YhsCFEJiRIMADpmeUOpqgjRAAAAAElFTkSuQmCC
# priority: optional
................................................................................
# extraction-method: regex
#
# Radio.net lists around 20.000 worldwide radio stations.
# A maximum of three pages from each genre are fetched here,
# some of the empty categories already omitted.
#
# The website heavily depends on JavaScript, a Flash player,
# some social tracking cookies. Now requires additional page
# request to get real streaming url, but at least no more
# expiring access key.




import time
import json
import re
from config import *
from channels import *
................................................................................
#   → "streamUrl": and "description": are scanned for.
#
class radionet (ChannelPlugin):

    # control flags
    has_search = False
    audioformat = "audio/mpeg"
    listformat = "href"
    titles = dict(listeners=False, playing="Description")

    # sources
    apiPrefix = "https://api.radio.net/info/v2"
    genre_url = "http://www.radio.net/genre/{}"
    apiKey = None
    
................................................................................

        # category page, get key
        html = ahttp.get(self.genre_url.format(cat))
        for p in range(2, 4):
            if html.find('"?p={}">'.format(p)) >= 0:
                html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p))
        self.set_key(html)

        r = []


        # split station blocks
        for row in re.findall("""<div class="stationinfo(.+?)</div>""", html, re.S):
            log.FOR_rOW(1)
        
            # extract text fields
            d = re.findall("""
              <a\s+href="(?:https?:)?(//(?:[\w-]+)\.radio\.net/s/([^"]+)/?)" .*?
              <img[^>]+ src="([^<">]+)" .*?
              <strong[^>]*>(.*?)</strong> .*?
              <small[^>]*>\s*(.*?)\s*</small>
            """, row, re.X|re.S)
#            log.DATA_ROW(d)
            
            # refurbish extracted strings
            if d and len(d) and len(d[0]) == 5:
                href, name, img, title, desc = d[0]
                r.append(dict(
                    name = name,
                    genre = cat,
                    title = unhtml(title),
                    playing = unhtml(desc),
                    url = "urn:radionet:"+name, #self._url(name),
                    homepage = "http:{}".format(href),
                    img = img,
                ));
        return r


    # api search is gone, now requires to fetch streamUrl from per-radio homepage
    def resolve_urn(self, row):
        if row.get("url", "-").find("urn:radionet:") != 0:
            return
        html = ahttp.get(row["homepage"])
        stream = re.findall('"streamUrl":\s*"([^"]+)"', html, re.S)
        if stream:
            row["url"] = stream[0]
            return row
    

    # extract JavaScript key from any HTML blob (needed for station query)
    def set_key(self, html):
        ls = re.findall("""apiKey: '(\w+)'""", html)
        if ls:
            self.apiKey = ls[0]