Index: channels/__init__.py ================================================================== --- channels/__init__.py +++ channels/__init__.py @@ -41,11 +41,12 @@ import inspect # Only export plugin classes __all__ = [ - "GenericChannel", "ChannelPlugin", "use_rx" + "GenericChannel", "ChannelPlugin", "use_rx", + "entity_decode", "strip_tags", "to_int", "nl" ] # generic channel module --------------------------------------- @@ -165,12 +166,12 @@ # add to main menu uikit.add_menu([parent.channelmenuitems], self.meta["title"], lambda w: parent.channel_switch_by_name(self.module) or 1) # Statusbar stub (defers to parent/main window, if in GUI mode) - def status(self, *v): - if self.parent: self.parent.status(*v) + def status(self, *args, **kw): + if self.parent: self.parent.status(*args, **kw) else: log.INFO("status():", *v) #--------------------- streams/model data accesss --------------------------- @@ -547,13 +548,10 @@ #--------------------------- utility functions ----------------------- - # remove html from string - def strip_tags(self, s): - return re.sub("<.+?>", "", s) # convert audio format nick/shortnames to mime types, e.g. "OGG" to "audio/ogg" def mime_fmt(self, s): # clean string s = s.lower().strip() @@ -574,31 +572,10 @@ if s.find("/") < 1: s = "audio/" + s # return s - # remove SGML/XML entities - def entity_decode(self, str): - return re.sub('&(#?(x?))(\w+);', self._entity, str) - def _entity(self, sym): - num, hex, name = sym.groups() - if hex: - return unichr(int(name, base=16)) - elif num: - return unichr(int(name)) - else: - return unichr(htmlentitydefs.name2codepoint[name]) - - # convert special characters to &xx; escapes - def xmlentities(self, s): - return xml.sax.saxutils.escape(s) - - # Extracts integer from string - def to_int(self, s): - i = re.findall("\d+", s) or [0] - return int(i[0]) - @@ -686,10 +663,11 @@ # add notebook tab tab = parent.notebook_channels.insert_page_menu(vbox, ev_label, plain_label, -1) parent.notebook_channels.set_tab_reorderable(vbox, True) + # WORKAROUND for direct channel module imports, # eases instantiations without GUI a little, # reducing module dependencies (conf. / ahttp. / channels. / parent.) would be better def stub_parent(object): @@ -713,5 +691,39 @@ continue return [] return try_both + +#---------------- utility functions ------------------- +# Used by raw page extraction in channel modules + + +# Strip html from string +def strip_tags(s): + return re.sub("<.+?>", "", s) + +# remove SGML/XML entities +def entity_decode(str): + return re.sub('&(#?(x?))(\w+);', _entity, str) +def _entity(sym): + num, hex, name = sym.groups() + if hex: + return unichr(int(name, base=16)) + elif num: + return unichr(int(name)) + else: + return unichr(htmlentitydefs.name2codepoint[name]) + +# Extracts integer from string +def to_int(s): + i = re.findall("\d+", s) or [0] + return int(i[0]) + +# Strip newlines +rx_spc = re.compile("\s+") +def nl(str): + return rx_spc.sub(" ", str).strip() + + +def unhtml(str): + return nl(entity_decode(strip_tags(str)))