Check-in [b6d88bcd1f]
Overview
| Comment: | Document more interna of radio.net extraction |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
b6d88bcd1f9fc688519407e7ecf1cdc8 |
| User & Date: | mario on 2017-02-16 16:56:59 |
| Other Links: | manifest | tags |
Context
|
2017-02-20
| ||
| 19:54 | Add stub parameter -w (used by pydoc) check-in: ff61e15d6a user: mario tags: trunk | |
|
2017-02-16
| ||
| 16:56 | Document more interna of radio.net extraction check-in: b6d88bcd1f user: mario tags: trunk | |
|
2017-02-15
| ||
| 21:38 | document recent channel/feature plugins check-in: 7eb6bd6410 user: mario tags: trunk | |
Changes
Modified contrib/radionet.py from [29c966c64c] to [460ab10c07].
| ︙ | ︙ | |||
40 41 42 43 44 45 46 |
action.extract_playlist.extr_urls["rnjs"] = dict(
url = r" (?x) \"streamUrl\" \s*:\s* \"(\w+:\\?/\\?/[^\"]+)\" ",
title = r" (?x) \"(?:description|seoTitle)\" \s*:\s* \"([^\"]+)\" ",
unesc = "json",
)
| | > > > > > > > > > > > | > | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
action.extract_playlist.extr_urls["rnjs"] = dict(
url = r" (?x) \"streamUrl\" \s*:\s* \"(\w+:\\?/\\?/[^\"]+)\" ",
title = r" (?x) \"(?:description|seoTitle)\" \s*:\s* \"([^\"]+)\" ",
unesc = "json",
)
# Radio.net
#
# ยท Uses HTML block-wise regex extraction.
# โ <a href="stationname.radio.net"> <imgโฆ> <strong>โฆ</strong> <small>โฆ</small>
#
# ยท There's an API key in each page listing, contained in a script block
# as `apiKey: 'โฆ'?`
#
# ยท Which is needed for generating the station info JSON urls:
# โ https://api.radio.net/info/v2/search/station?apikey=โฆ&pageindex=1&station=STNAME
#
# ยท To extract these JSON info targets, a custom extraction recipie is injected
# into the action module.
# โ "streamUrl": and "description": are scanned for.
#
class radionet (ChannelPlugin):
# control flags
has_search = False
audioformat = "audio/mpeg"
listformat = "rnjs"
|
| ︙ | ︙ | |||
75 76 77 78 79 80 81 |
# category page, get key
html = ahttp.get(self.genre_url.format(cat))
for p in range(2, 4):
if html.find('"?p={}">'.format(p)) >= 0:
html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p))
self.set_key(html)
r = []
| | < < < < < < < < < < < < < < < < < < | | 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# category page, get key
html = ahttp.get(self.genre_url.format(cat))
for p in range(2, 4):
if html.find('"?p={}">'.format(p)) >= 0:
html += ahttp.get(self.genre_url.format(cat) + "?p={}".format(p))
self.set_key(html)
r = []
# split station blocks
for row in re.split("""<div class="stationinfo""", html)[1:]:
# extract text fields
d = re.findall("""
<a\s+href="(?:https?:)?(//([\w-]+)\.radio\.net/?)" .*?
<img\s+src="([^<">]+)" .*?
<strong[^>]*>(.*?)</strong> .*?
<small[^>]*>\s*(.*?)\s*</small> .*?
""", row, re.X|re.S)
# refurbish extracted strings
if d and len(d) and len(d[0]) == 5:
|
| ︙ | ︙ |