Internet radio browser GUI for music/video streams from various directory services.

⌈⌋ ⎇ branch:  streamtuner2


Check-in [6f314952b9]

Overview
Comment:Add combined unhtml() utility function for raw page extractors.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 6f314952b924f0c34a533a936976cf753ce887dd
User & Date: mario on 2015-05-02 20:03:36
Other Links: manifest | tags
Context
2015-05-02
23:44
Fix xiph search URL and by_format mapping. check-in: 026af5c9fb user: mario tags: trunk
20:03
Add combined unhtml() utility function for raw page extractors. check-in: 6f314952b9 user: mario tags: trunk
20:03
Clean out unneeded xml module references. check-in: 4797dcce8e user: mario tags: trunk
Changes

Modified channels/__init__.py from [90231f2619] to [4aa8b6efaf].

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import copy
import inspect


# Only export plugin classes
__all__ = [
    "GenericChannel", "ChannelPlugin", "use_rx",
    "entity_decode", "strip_tags", "to_int", "nl"
]



# generic channel module                            ---------------------------------------
class GenericChannel(object):








|







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import copy
import inspect


# Only export plugin classes
__all__ = [
    "GenericChannel", "ChannelPlugin", "use_rx",
    "entity_decode", "strip_tags", "nl", "unhtml", "to_int"
]



# generic channel module                            ---------------------------------------
class GenericChannel(object):

720
721
722
723
724
725
726
727
728
729


    return int(i[0])

# Strip newlines
rx_spc = re.compile("\s+")
def nl(str):
    return rx_spc.sub(" ", str).strip()


def unhtml(str):
    return nl(entity_decode(strip_tags(str)))









|


>
>
720
721
722
723
724
725
726
727
728
729
730
731
    return int(i[0])

# Strip newlines
rx_spc = re.compile("\s+")
def nl(str):
    return rx_spc.sub(" ", str).strip()

# Combine html tag, escapes and whitespace cleanup
def unhtml(str):
    return nl(entity_decode(strip_tags(str)))


Modified channels/xiph.py from [0e535d4458] to [10274b4d66].

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
          .*? class="format"\s+title="([^"]+)"
          .*? /by_format/([^"]+)
      """, html, re.X|re.S)
      
      # Assemble
      for homepage, title, listeners, playing, tags, url, bits, fmt in ls:
          r.append(dict(
              genre = clean(tags),
              title = clean(title),
              homepage = ahttp.fix_url(homepage),
              playing = clean(playing),
              url = "http://dir.xiph.org{}".format(url),
              listformat = "xspf",
              listeners = int(listeners),
              bitrate = bitrate(bits),
              format = self.mime_fmt(guess_format(fmt)),
          ))
      return r







|
|

|







209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
          .*? class="format"\s+title="([^"]+)"
          .*? /by_format/([^"]+)
      """, html, re.X|re.S)
      
      # Assemble
      for homepage, title, listeners, playing, tags, url, bits, fmt in ls:
          r.append(dict(
              genre = unhtml(tags),
              title = unhtml(title),
              homepage = ahttp.fix_url(homepage),
              playing = unhtml(playing),
              url = "http://dir.xiph.org{}".format(url),
              listformat = "xspf",
              listeners = int(listeners),
              bitrate = bitrate(bits),
              format = self.mime_fmt(guess_format(fmt)),
          ))
      return r
515
516
517
518
519
520
521
522
523
524
525
526
527
        return 0


# Extract mime type from text
rx_fmt = re.compile("ogg|mp3|mp4|theora|nsv|webm|opus|mpeg")
def guess_format(str):
    return rx_fmt.findall(str.lower() + "mpeg")[0]

# Clean up HTML text snippets
def clean(str):
    return nl(entity_decode(strip_tags(str)))










<
<
<
<
<
515
516
517
518
519
520
521
522





        return 0


# Extract mime type from text
rx_fmt = re.compile("ogg|mp3|mp4|theora|nsv|webm|opus|mpeg")
def guess_format(str):
    return rx_fmt.findall(str.lower() + "mpeg")[0]






Modified contrib/delicast.py from [b077b90d19] to [cf6c41823e].

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
            """, tr, re.X|re.S)
            print ls
            if len(ls):
                homepage, country, title = ls[0]
                r.append(dict(
                    homepage = homepage,
                    playing = country,
                    title = self.entity_decode(title).strip(),
                    url = "urn:delicast",
                    genre = cat,
             #      genre = self.entity_decode(self.strip_tags(tags)).strip(),
                ))
        return r
      

    # Update `url`
    def row(self):
        r = ChannelPlugin.row(self)
        if r.get("url") == "urn:delicast":
            html = ahttp.get(r["homepage"])
            ls = re.findall("^var url = \"(.+)\";", html, re.M)
            r["url"] = ls[0]
        return r








|


|













64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
            """, tr, re.X|re.S)
            print ls
            if len(ls):
                homepage, country, title = ls[0]
                r.append(dict(
                    homepage = homepage,
                    playing = country,
                    title = unhtml(title),
                    url = "urn:delicast",
                    genre = cat,
             #      genre = unhtml(tags),
                ))
        return r
      

    # Update `url`
    def row(self):
        r = ChannelPlugin.row(self)
        if r.get("url") == "urn:delicast":
            html = ahttp.get(r["homepage"])
            ls = re.findall("^var url = \"(.+)\";", html, re.M)
            r["url"] = ls[0]
        return r