Index: action.py ================================================================== --- action.py +++ action.py @@ -122,20 +122,20 @@ # Exec wrapper # def run(cmd): - log.PROC("Exec:", cmd) + log.EXEC(cmd) try: os.system("start \"%s\"" % cmd if conf.windows else cmd + " &") except: log.ERR("Command not found:", cmd) # Start web browser # def browser(url): bin = conf.play.get("url/http", "sensible-browser") - log.BROWSER(bin) + log.EXEC(bin) run(bin + " " + quote(url)) # Open help browser, streamtuner2 pages # @@ -244,11 +244,11 @@ ext = cnv.probe_ext(url) probe = cnv.probe_fmt() # Check ambiguity (except pseudo extension) if len(set([source, mime, probe])) > 1: - log.ERR("Possible playlist format mismatch:", "listformat={}, http_mime={}, rx_probe={}, ext={}".format(source, mime, probe, ext)) + log.WARN("Possible playlist format mismatch:", "listformat={}, http_mime={}, rx_probe={}, ext={}".format(source, mime, probe, ext)) # Extract URLs from content for fmt in playlist_fmt_prio: if not urls and fmt in (source, mime, probe, ext, "raw"): urls = cnv.urls(fmt) @@ -324,11 +324,11 @@ self.src = open(fn, "rt").read() # Test URL/path "extension" for ".pls" / ".m3u" etc. def probe_ext(self, url): - e = re.findall("\.(pls|m3u|xspf|jspf|asx|wpl|wsf|smil|html|url|json|desktop)$", url) + e = re.findall("\.(pls|m3u|xspf|jspf|asx|wpl|wsf|smil|html|url|json|desktop)\d?$", url) if e: return e[0] else: pass # Probe MIME type and content per regex @@ -335,24 +335,29 @@ def probe_fmt(self): for probe,rx in playlist_content_map: if re.search(rx, self.src, re.X|re.M|re.S): return listfmt(probe) return None + # Return just URL list from extracted playlist def urls(self, fmt): return [row["url"] for row in self.rows(fmt)] + # Extract only URLs from given source type def rows(self, fmt=None): if not fmt: fmt = self.probe_fmt() log.DATA("input extractor/regex:", fmt, len(self.src)) # specific extractor implementations - if fmt in self.__dict__: - return getattr(self, fmt)() + if fmt in dir(self): + try: + return getattr(self, fmt)() + except Exception as e: + log.WARN("Native {} parser failed on input (improper encoding, etc)".format(fmt), e) # regex scheme rules = self.extr_urls[fmt] rows = [] fields = [name for name in ("url", "title", "homepage", "genre", "playing") if rules.get(name)] @@ -386,19 +391,21 @@ vals = re.findall(rules[name], src_part, re.X) #log.PLS_EXTR_FIELD(name, vals, src_part, rules[name]) return [self.decode(val, rules.get("unesc")) for val in vals] return [None] - # Decoding + + # String decoding def decode(self, val, unesc): if unesc in ("xml", "*"): val = xmlunescape(val) if unesc in ("json", "*"): val = val.replace("\\/", "/") return val - # filter out duplicate urls + + # Filter out duplicate urls def uniq(self, rows): seen = [] filtered = [] for row in rows: if not row or not row.get("url") or row.get("url") in seen: @@ -409,12 +416,12 @@ # These regexps only look out for URLs, not local file paths. extr_urls = { "pls": dict( - url = r"(?im) ^ \s*File\d* \s*=\s* (\w+://[^\s]+) ", - title = r"(?m) ^Title\d*=(.+)", + url = r"(?m) ^File\d* \s*=\s* (\w+://[^\s]+) ", + title = r"(?m) ^Title\d* \s*=\s*(.+)", # Notably this extraction method assumes the entries are grouped in associative order ), "m3u": dict( split = r"(?m) (?=^\#)", url = r"(?m) ^( \w+:// [^#\n]+ )", @@ -470,10 +477,23 @@ url = r" (?i) ( [\w+]+:// [^\s\"\'\>\#]+ ) ", title = r"(?i)Title[\W]+(.+)", unesc = "*", ), } + + + # More exact PLS extraction (for the unlikely case entries were misordered) + def pls(self): + fieldmap = dict(file="url", title="title") + rows = {} + for field,num,value in re.findall("^\s* ([a-z_-]+) (\d+) \s*=\s* (.*) $", self.src, re.M|re.I|re.X): + if not num in rows: + rows[num] = {} + field = fieldmap.get(field.lower()) + if field: + rows[num][field] = value.strip() + return [rows[str(i)] for i in sorted(map(int, rows.keys()))] # Add placeholder fields to extracted row def mkrow(self, row, title=None): url = row.get("url", "")