Check-in [b6d88bcd1f]
Overview
| Comment: | Document more interna of radio.net extraction | 
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive | 
| Timelines: | family | ancestors | descendants | both | trunk | 
| Files: | files | file ages | folders | 
| SHA1: | 
b6d88bcd1f9fc688519407e7ecf1cdc8 | 
| User & Date: | mario on 2017-02-16 16:56:59 | 
| Other Links: | manifest | tags | 
Context
| 
   2017-02-20 
 | ||
| 19:54 | Add stub parameter -w (used by pydoc) check-in: ff61e15d6a user: mario tags: trunk | |
| 
   2017-02-16 
 | ||
| 16:56 | Document more interna of radio.net extraction check-in: b6d88bcd1f user: mario tags: trunk | |
| 
   2017-02-15 
 | ||
| 21:38 | document recent channel/feature plugins check-in: 7eb6bd6410 user: mario tags: trunk | |
Changes
Modified contrib/radionet.py from [29c966c64c] to [460ab10c07].
| ︙ | ︙ | |||
40 41 42 43 44 45 46  | 
action.extract_playlist.extr_urls["rnjs"] = dict(
    url   = r" (?x) \"streamUrl\" \s*:\s* \"(\w+:\\?/\\?/[^\"]+)\" ",
    title = r" (?x) \"(?:description|seoTitle)\" \s*:\s* \"([^\"]+)\" ",
    unesc = "json",
)
 | | > > > > > > > > > > > | >  | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67  | 
action.extract_playlist.extr_urls["rnjs"] = dict(
    url   = r" (?x) \"streamUrl\" \s*:\s* \"(\w+:\\?/\\?/[^\"]+)\" ",
    title = r" (?x) \"(?:description|seoTitle)\" \s*:\s* \"([^\"]+)\" ",
    unesc = "json",
)
# Radio.net
#
# ยท Uses HTML block-wise regex extraction.
#   โ <a href="stationname.radio.net"> <imgโฆ> <strong>โฆ</strong> <small>โฆ</small>
#
# ยท There's an API key in each page listing, contained in a script block
#   as `apiKey: 'โฆ'?`
#
# ยท Which is needed for generating the station info JSON urls:
#   โ https://api.radio.net/info/v2/search/station?apikey=โฆ&pageindex=1&station=STNAME
#
# ยท To extract these JSON info targets, a custom extraction recipie is injected
#   into the action module.
#   โ "streamUrl": and "description": are scanned for.
#
class radionet (ChannelPlugin):
    # control flags
    has_search = False
    audioformat = "audio/mpeg"
    listformat = "rnjs"
 | 
| ︙ | ︙ | |||
75 76 77 78 79 80 81  | 
        # category page, get key
        html = ahttp.get(self.genre_url.format(cat))
        for p in range(2, 4):
            if html.find('"?p={}">'.format(p)) >= 0:
                html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p))
        self.set_key(html)
        r = []
 | | < < < < < < < < < < < < < < < < < < |  | 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107  | 
        # category page, get key
        html = ahttp.get(self.genre_url.format(cat))
        for p in range(2, 4):
            if html.find('"?p={}">'.format(p)) >= 0:
                html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p))
        self.set_key(html)
        r = []
        # split station blocks
        for row in re.split("""<div class="stationinfo""", html)[1:]:
        
            # extract text fields
            d = re.findall("""
              <a\s+href="(?:https?:)?(//([\w-]+)\.radio\.net/?)" .*?
              <img\s+src="([^<">]+)" .*?
              <strong[^>]*>(.*?)</strong> .*?
              <small[^>]*>\s*(.*?)\s*</small> .*?
            """, row, re.X|re.S)
            
            # refurbish extracted strings
            if d and len(d) and len(d[0]) == 5:
 | 
| ︙ | ︙ |