Index: channels/__init__.py ================================================================== --- channels/__init__.py +++ channels/__init__.py @@ -42,11 +42,11 @@ # Only export plugin classes __all__ = [ "GenericChannel", "ChannelPlugin", "use_rx", - "entity_decode", "strip_tags", "to_int", "nl" + "entity_decode", "strip_tags", "nl", "unhtml", "to_int" ] # generic channel module --------------------------------------- @@ -722,8 +722,10 @@ # Strip newlines rx_spc = re.compile("\s+") def nl(str): return rx_spc.sub(" ", str).strip() - +# Combine html tag, escapes and whitespace cleanup def unhtml(str): return nl(entity_decode(strip_tags(str))) + + Index: channels/xiph.py ================================================================== --- channels/xiph.py +++ channels/xiph.py @@ -211,14 +211,14 @@ """, html, re.X|re.S) # Assemble for homepage, title, listeners, playing, tags, url, bits, fmt in ls: r.append(dict( - genre = clean(tags), - title = clean(title), + genre = unhtml(tags), + title = unhtml(title), homepage = ahttp.fix_url(homepage), - playing = clean(playing), + playing = unhtml(playing), url = "http://dir.xiph.org{}".format(url), listformat = "xspf", listeners = int(listeners), bitrate = bitrate(bits), format = self.mime_fmt(guess_format(fmt)), @@ -517,11 +517,6 @@ # Extract mime type from text rx_fmt = re.compile("ogg|mp3|mp4|theora|nsv|webm|opus|mpeg") def guess_format(str): return rx_fmt.findall(str.lower() + "mpeg")[0] - -# Clean up HTML text snippets -def clean(str): - return nl(entity_decode(strip_tags(str))) - Index: contrib/delicast.py ================================================================== --- contrib/delicast.py +++ contrib/delicast.py @@ -66,14 +66,14 @@ if len(ls): homepage, country, title = ls[0] r.append(dict( homepage = homepage, playing = country, - title = self.entity_decode(title).strip(), + title = unhtml(title), url = "urn:delicast", genre = cat, - # genre = self.entity_decode(self.strip_tags(tags)).strip(), + # genre = unhtml(tags), )) return r # Update `url`