Index: action.py ================================================================== --- action.py +++ action.py @@ -55,10 +55,11 @@ "*/*": "href", # "href" for unknown responses "url/direct": "srv", "url/youtube": "href", "url/http": "href", "audio/x-pn-realaudio": "ram", + "application/json": "json", "application/smil": "smil", "application/vnd.ms-wpl":"smil", "audio/x-ms-wax": "asx", "video/x-ms-asf": "asx", "x-urn/st2-script": "script", # unused @@ -91,11 +92,11 @@ # Playlist format content probing (assert type) playlist_content_map = [ ("pls", r""" (?i)\[playlist\].*NumberOfEntries """), ("xspf", r""" <\?xml .* ]*> .* """), ("html", r""" (?i)<(audio|video)\b[^>]+\bsrc\s*=\s*["']?https?:// """), ("wpl", r""" <\?wpl \s+ version="1\.0" \s* \?> """), ("b4s", r""" """), # http://gonze.com/playlists/playlist-format-survey.html @@ -119,10 +120,11 @@ # Start web browser # def browser(url): bin = conf.play.get("url/http", "sensible-browser") + print url run(bin + " " + quote(url)) # Open help browser, streamtuner2 pages # @@ -147,14 +149,14 @@ # OS shell command escaping # def quote(ins): - if type(ins) is str: - return "%r" % str(ins) + if type(ins) is list: + return " ".join(["%r" % str(s) for s in ins]) else: - return " ".join(["%r" % str(s) for s in ins]) + return "%r" % str(ins) # Convert e.g. "text/x-scpls" MIME types to just "pls" monikers # def listfmt(t = "pls"): @@ -232,14 +234,14 @@ probe = listfmt(probe) break # with `probe` set # Check ambiguity (except pseudo extension) if len(set([source, mime, probe])) > 1: - debug(dbg.ERR, "Possible playlist format mismatch:", (source, mime, probe, ext)) + debug(dbg.ERR, "Possible playlist format mismatch:", "listformat={}, http_mime={}, rx_probe={}, ext={}".format(source, mime, probe, ext)) # Extract URLs from content - for fmt in ["pls", "xspf", "asx", "smil", "jspf", "m3u", "json", "asf", "jamj", "raw"]: + for fmt in [id[0] for id in extract_playlist.extr_urls]: if not urls and fmt in (source, mime, probe, ext, "raw"): urls = extract_playlist(cnt).format(fmt) debug(dbg.DATA, "conversion from:", source, " with extractor:", fmt, "got URLs=", urls) # Return original, or asis for srv targets @@ -289,43 +291,53 @@ # Extract URLs from playlist formats: # +# It's entirely regex-based at the moment, because that's more +# resilient against mailformed XSPF or JSON. +# Needs proper extractors later for real playlist *imports*. +# class extract_playlist(object): # Content of playlist file src = "" def __init__(self, text): self.src = text # Extract only URLs from given source type def format(self, fmt): - debug(dbg.DATA, "input regex:", fmt, len(self.src)) - # regex - urls = re.findall(self.extr_urls[fmt], self.src, re.X) - # xml entities - urls = [xmlunescape(url) for url in urls] - # json escaping - urls = [url.replace("\\/", "/") for url in urls] - # uniques - urls = list(set(urls)) - return urls - - # Only look out for URLs, not local file paths - extr_urls = { - "pls": r"(?im) ^ \s*File\d* \s*=\s* (\w+://[^\s]+) ", - "m3u": r" (?m) ^( \w+:// [^#\n]+ )", - "xspf": r" (?x) (\w+://[^<>\s]+) ", - "asx": r" (?x) ]+\b href \s*=\s* [\'\"] (\w+://[^\s\"\']+) [\'\"] ", - "smil": r" (?x) <(?:audio|video|media)\b [^>]+ \b src \s*=\s* [^\"\']? \s* (\w+://[^\"\'\s]+) ", - "jspf": r" (?x) \"location\" \s*:\s* \"(\w+://[^\"\s]+)\" ", - "jamj": r" (?x) \"audio\" \s*:\s* \"(\w+:\\?/\\?/[^\"\s]+)\" ", - "json": r" (?x) \"url\" \s*:\s* \"(\w+://[^\"\s]+)\" ", - "asf": r" (?m) ^ \s*Ref\d+ = (\w+://[^\s]+) ", - "raw": r" (?i) ( [\w+]+:// [^\s\"\'\>\#]+ ) ", - } + debug(dbg.DATA, "input extractor/regex:", fmt, len(self.src)) + + # find extractor + if fmt in dir(self): + return self.__dict__[fmt]() + + # regex scheme + rx, decode = dict(self.extr_urls)[fmt] + urls = re.findall(rx, self.src, re.X) + # decode urls + if decode in ("xml", "*"): + urls = [xmlunescape(url) for url in urls] + if decode in ("json", "*"): + urls = [url.replace("\\/", "/") for url in urls] + # only uniques + return list(set(urls)) + + # Only look out for URLs, not local file paths, nor titles + extr_urls = ( + ("pls", (r"(?im) ^ \s*File\d* \s*=\s* (\w+://[^\s]+) ", None)), + ("m3u", (r" (?m) ^( \w+:// [^#\n]+ )", None)), + ("xspf", (r" (?x) (\w+://[^<>\s]+) ", "xml")), + ("asx", (r" (?x) ]+\b href \s*=\s* [\'\"] (\w+://[^\s\"\']+) [\'\"] ", "xml")), + ("smil", (r" (?x) <(?:audio|video|media)\b [^>]+ \b src \s*=\s* [^\"\']? \s* (\w+://[^\"\'\s]+) ", "xml")), + ("jspf", (r" (?x) \"location\" \s*:\s* \"(\w+://[^\"\s]+)\" ", "json")), + ("jamj", (r" (?x) \"audio\" \s*:\s* \"(\w+:\\?/\\?/[^\"\s]+)\" ", "json")), + ("json", (r" (?x) \"url\" \s*:\s* \"(\w+://[^\"\s]+)\" ", "json")), + ("asf", (r" (?m) ^ \s*Ref\d+ = (\w+://[^\s]+) ", "xml")), + ("raw", (r" (?i) ( [\w+]+:// [^\s\"\'\>\#]+ ) ", "*")), + ) # Save rows in one of the export formats. # # The export() version uses urls[]+row/title= as input, converts it into