Check-in [0725d3fbc8]
Overview
Comment: | Add custom pls extractor (for unordered playlist entries), keep regex method as fallback. More logging. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
0725d3fbc8bba8bee7cea85cbe67ae90 |
User & Date: | mario on 2015-04-26 15:34:03 |
Other Links: | manifest | tags |
Context
2015-04-26
| ||
15:34 | More customized log categories/colorization. check-in: 0943cca27e user: mario tags: trunk | |
15:34 | Add custom pls extractor (for unordered playlist entries), keep regex method as fallback. More logging. check-in: 0725d3fbc8 user: mario tags: trunk | |
2015-04-25
| ||
00:39 | Replace statusbar with plain gtk.Label, use glib.timeout_add for clearing it up implicitly. check-in: 805dbd5181 user: mario tags: trunk | |
Changes
Modified action.py from [d35e0e213b] to [18fc32c37f].
︙ | ︙ | |||
120 121 122 123 124 125 126 | ] # Exec wrapper # def run(cmd): | | | | 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | ] # Exec wrapper # def run(cmd): log.EXEC(cmd) try: os.system("start \"%s\"" % cmd if conf.windows else cmd + " &") except: log.ERR("Command not found:", cmd) # Start web browser # def browser(url): bin = conf.play.get("url/http", "sensible-browser") log.EXEC(bin) run(bin + " " + quote(url)) # Open help browser, streamtuner2 pages # def help(*args): run("yelp /usr/share/doc/streamtuner2/help/") |
︙ | ︙ | |||
242 243 244 245 246 247 248 | # Deduce likely content format cnv = extract_playlist(cnt) ext = cnv.probe_ext(url) probe = cnv.probe_fmt() # Check ambiguity (except pseudo extension) if len(set([source, mime, probe])) > 1: | | | 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 | # Deduce likely content format cnv = extract_playlist(cnt) ext = cnv.probe_ext(url) probe = cnv.probe_fmt() # Check ambiguity (except pseudo extension) if len(set([source, mime, probe])) > 1: log.WARN("Possible playlist format mismatch:", "listformat={}, http_mime={}, rx_probe={}, ext={}".format(source, mime, probe, ext)) # Extract URLs from content for fmt in playlist_fmt_prio: if not urls and fmt in (source, mime, probe, ext, "raw"): urls = cnv.urls(fmt) log.DATA("conversion from:", source, " with extractor:", fmt, "got URLs=", urls) |
︙ | ︙ | |||
322 323 324 325 326 327 328 | if fn and self.probe_ext(fn): self.fn = fn self.src = open(fn, "rt").read() # Test URL/path "extension" for ".pls" / ".m3u" etc. def probe_ext(self, url): | | > > | > | > > | 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 | if fn and self.probe_ext(fn): self.fn = fn self.src = open(fn, "rt").read() # Test URL/path "extension" for ".pls" / ".m3u" etc. def probe_ext(self, url): e = re.findall("\.(pls|m3u|xspf|jspf|asx|wpl|wsf|smil|html|url|json|desktop)\d?$", url) if e: return e[0] else: pass # Probe MIME type and content per regex def probe_fmt(self): for probe,rx in playlist_content_map: if re.search(rx, self.src, re.X|re.M|re.S): return listfmt(probe) return None # Return just URL list from extracted playlist def urls(self, fmt): return [row["url"] for row in self.rows(fmt)] # Extract only URLs from given source type def rows(self, fmt=None): if not fmt: fmt = self.probe_fmt() log.DATA("input extractor/regex:", fmt, len(self.src)) # specific extractor implementations if fmt in dir(self): try: return getattr(self, fmt)() except Exception as e: log.WARN("Native {} parser failed on input (improper encoding, etc)".format(fmt), e) # regex scheme rules = self.extr_urls[fmt] rows = [] fields = [name for name in ("url", "title", "homepage", "genre", "playing") if rules.get(name)] # Block-wise processing |
︙ | ︙ | |||
384 385 386 387 388 389 390 | def field(self, name, rules, src_part): if name in rules: vals = re.findall(rules[name], src_part, re.X) #log.PLS_EXTR_FIELD(name, vals, src_part, rules[name]) return [self.decode(val, rules.get("unesc")) for val in vals] return [None] | > | > | | | | 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 | def field(self, name, rules, src_part): if name in rules: vals = re.findall(rules[name], src_part, re.X) #log.PLS_EXTR_FIELD(name, vals, src_part, rules[name]) return [self.decode(val, rules.get("unesc")) for val in vals] return [None] # String decoding def decode(self, val, unesc): if unesc in ("xml", "*"): val = xmlunescape(val) if unesc in ("json", "*"): val = val.replace("\\/", "/") return val # Filter out duplicate urls def uniq(self, rows): seen = [] filtered = [] for row in rows: if not row or not row.get("url") or row.get("url") in seen: continue; seen.append(row.get("url")) filtered.append(row) return rows # These regexps only look out for URLs, not local file paths. extr_urls = { "pls": dict( url = r"(?m) ^File\d* \s*=\s* (\w+://[^\s]+) ", title = r"(?m) ^Title\d* \s*=\s*(.+)", # Notably this extraction method assumes the entries are grouped in associative order ), "m3u": dict( split = r"(?m) (?=^\#)", url = r"(?m) ^( \w+:// [^#\n]+ )", title = r"(?m) ^ \#EXTINF [-:\d,]* (.+)", ), |
︙ | ︙ | |||
468 469 470 471 472 473 474 475 476 477 478 479 480 481 | ), "raw": dict( url = r" (?i) ( [\w+]+:// [^\s\"\'\>\#]+ ) ", title = r"(?i)Title[\W]+(.+)", unesc = "*", ), } # Add placeholder fields to extracted row def mkrow(self, row, title=None): url = row.get("url", "") comb = { "title": row.get("title") or re.sub("\.\w+$", "", os.path.basename(self.fn)), | > > > > > > > > > > > > > | 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 | ), "raw": dict( url = r" (?i) ( [\w+]+:// [^\s\"\'\>\#]+ ) ", title = r"(?i)Title[\W]+(.+)", unesc = "*", ), } # More exact PLS extraction (for the unlikely case entries were misordered) def pls(self): fieldmap = dict(file="url", title="title") rows = {} for field,num,value in re.findall("^\s* ([a-z_-]+) (\d+) \s*=\s* (.*) $", self.src, re.M|re.I|re.X): if not num in rows: rows[num] = {} field = fieldmap.get(field.lower()) if field: rows[num][field] = value.strip() return [rows[str(i)] for i in sorted(map(int, rows.keys()))] # Add placeholder fields to extracted row def mkrow(self, row, title=None): url = row.get("url", "") comb = { "title": row.get("title") or re.sub("\.\w+$", "", os.path.basename(self.fn)), |
︙ | ︙ |