Internet radio browser GUI for music/video streams from various directory services.

⌈⌋ branch:  streamtuner2


Check-in [0725d3fbc8]

Overview
Comment:Add custom pls extractor (for unordered playlist entries), keep regex method as fallback. More logging.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0725d3fbc8bba8bee7cea85cbe67ae9081bd6876
User & Date: mario on 2015-04-26 15:34:03
Other Links: manifest | tags
Context
2015-04-26
15:34
More customized log categories/colorization. check-in: 0943cca27e user: mario tags: trunk
15:34
Add custom pls extractor (for unordered playlist entries), keep regex method as fallback. More logging. check-in: 0725d3fbc8 user: mario tags: trunk
2015-04-25
00:39
Replace statusbar with plain gtk.Label, use glib.timeout_add for clearing it up implicitly. check-in: 805dbd5181 user: mario tags: trunk
Changes

Modified action.py from [d35e0e213b] to [18fc32c37f].

120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
]



# Exec wrapper
#
def run(cmd):
    log.PROC("Exec:", cmd)
    try:    os.system("start \"%s\"" % cmd if conf.windows else cmd + " &")
    except: log.ERR("Command not found:", cmd)


# Start web browser
#
def browser(url):
    bin = conf.play.get("url/http", "sensible-browser")
    log.BROWSER(bin)
    run(bin + " " + quote(url))


# Open help browser, streamtuner2 pages
#
def help(*args):
    run("yelp /usr/share/doc/streamtuner2/help/")







|








|







120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
]



# Exec wrapper
#
def run(cmd):
    log.EXEC(cmd)
    try:    os.system("start \"%s\"" % cmd if conf.windows else cmd + " &")
    except: log.ERR("Command not found:", cmd)


# Start web browser
#
def browser(url):
    bin = conf.play.get("url/http", "sensible-browser")
    log.EXEC(bin)
    run(bin + " " + quote(url))


# Open help browser, streamtuner2 pages
#
def help(*args):
    run("yelp /usr/share/doc/streamtuner2/help/")
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
    # Deduce likely content format
    cnv = extract_playlist(cnt)
    ext = cnv.probe_ext(url)
    probe = cnv.probe_fmt()

    # Check ambiguity (except pseudo extension)
    if len(set([source, mime, probe])) > 1:
        log.ERR("Possible playlist format mismatch:", "listformat={}, http_mime={}, rx_probe={}, ext={}".format(source, mime, probe, ext))

    # Extract URLs from content
    for fmt in playlist_fmt_prio:
        if not urls and fmt in (source, mime, probe, ext, "raw"):
            urls = cnv.urls(fmt)
            log.DATA("conversion from:", source, " with extractor:", fmt, "got URLs=", urls)
            







|







242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
    # Deduce likely content format
    cnv = extract_playlist(cnt)
    ext = cnv.probe_ext(url)
    probe = cnv.probe_fmt()

    # Check ambiguity (except pseudo extension)
    if len(set([source, mime, probe])) > 1:
        log.WARN("Possible playlist format mismatch:", "listformat={}, http_mime={}, rx_probe={}, ext={}".format(source, mime, probe, ext))

    # Extract URLs from content
    for fmt in playlist_fmt_prio:
        if not urls and fmt in (source, mime, probe, ext, "raw"):
            urls = cnv.urls(fmt)
            log.DATA("conversion from:", source, " with extractor:", fmt, "got URLs=", urls)
            
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339

340
341
342
343

344
345
346
347
348
349
350
351
352

353


354
355
356
357
358
359
360
        if fn and self.probe_ext(fn):
            self.fn = fn
            self.src = open(fn, "rt").read()


    # Test URL/path "extension" for ".pls" / ".m3u" etc.
    def probe_ext(self, url):
        e = re.findall("\.(pls|m3u|xspf|jspf|asx|wpl|wsf|smil|html|url|json|desktop)$", url)
        if e: return e[0]
        else: pass


    # Probe MIME type and content per regex
    def probe_fmt(self):
        for probe,rx in playlist_content_map:
            if re.search(rx, self.src, re.X|re.M|re.S):
                return listfmt(probe)
        return None


    # Return just URL list from extracted playlist
    def urls(self, fmt):
        return [row["url"] for row in self.rows(fmt)]

        
    # Extract only URLs from given source type
    def rows(self, fmt=None):
        if not fmt:
            fmt = self.probe_fmt()
        log.DATA("input extractor/regex:", fmt, len(self.src))

        # specific extractor implementations
        if fmt in self.__dict__:

            return getattr(self, fmt)()



        # regex scheme
        rules = self.extr_urls[fmt]
        rows = []
        fields = [name for name in ("url", "title", "homepage", "genre", "playing") if rules.get(name)]

        # Block-wise processing







|










>




>








|
>
|
>
>







322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
        if fn and self.probe_ext(fn):
            self.fn = fn
            self.src = open(fn, "rt").read()


    # Test URL/path "extension" for ".pls" / ".m3u" etc.
    def probe_ext(self, url):
        e = re.findall("\.(pls|m3u|xspf|jspf|asx|wpl|wsf|smil|html|url|json|desktop)\d?$", url)
        if e: return e[0]
        else: pass


    # Probe MIME type and content per regex
    def probe_fmt(self):
        for probe,rx in playlist_content_map:
            if re.search(rx, self.src, re.X|re.M|re.S):
                return listfmt(probe)
        return None


    # Return just URL list from extracted playlist
    def urls(self, fmt):
        return [row["url"] for row in self.rows(fmt)]

        
    # Extract only URLs from given source type
    def rows(self, fmt=None):
        if not fmt:
            fmt = self.probe_fmt()
        log.DATA("input extractor/regex:", fmt, len(self.src))

        # specific extractor implementations
        if fmt in dir(self):
            try:
                return getattr(self, fmt)()
            except Exception as e:
                log.WARN("Native {} parser failed on input (improper encoding, etc)".format(fmt), e)

        # regex scheme
        rules = self.extr_urls[fmt]
        rows = []
        fields = [name for name in ("url", "title", "homepage", "genre", "playing") if rules.get(name)]

        # Block-wise processing
384
385
386
387
388
389
390

391
392
393
394
395
396
397
398

399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
    def field(self, name, rules, src_part):
        if name in rules:
            vals = re.findall(rules[name], src_part, re.X)
            #log.PLS_EXTR_FIELD(name, vals, src_part, rules[name])
            return [self.decode(val, rules.get("unesc")) for val in vals]
        return [None]


    # Decoding
    def decode(self, val, unesc):
        if unesc in ("xml", "*"):
            val = xmlunescape(val)
        if unesc in ("json", "*"):
            val = val.replace("\\/", "/")
        return val


    # filter out duplicate urls
    def uniq(self, rows):
        seen = []
        filtered = []
        for row in rows:
            if not row or not row.get("url") or row.get("url") in seen:
                continue;
            seen.append(row.get("url"))
            filtered.append(row)
        return rows


    # These regexps only look out for URLs, not local file paths.
    extr_urls = {
        "pls": dict(
            url   = r"(?im) ^ \s*File\d* \s*=\s* (\w+://[^\s]+) ",
            title = r"(?m) ^Title\d*=(.+)",
            # Notably this extraction method assumes the entries are grouped in associative order
        ),
        "m3u": dict(
            split = r"(?m) (?=^\#)",
            url   = r"(?m) ^( \w+:// [^#\n]+ )",
            title = r"(?m) ^ \#EXTINF [-:\d,]* (.+)",
        ),







>
|







>
|














|
|







389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
    def field(self, name, rules, src_part):
        if name in rules:
            vals = re.findall(rules[name], src_part, re.X)
            #log.PLS_EXTR_FIELD(name, vals, src_part, rules[name])
            return [self.decode(val, rules.get("unesc")) for val in vals]
        return [None]


    # String decoding
    def decode(self, val, unesc):
        if unesc in ("xml", "*"):
            val = xmlunescape(val)
        if unesc in ("json", "*"):
            val = val.replace("\\/", "/")
        return val


    # Filter out duplicate urls
    def uniq(self, rows):
        seen = []
        filtered = []
        for row in rows:
            if not row or not row.get("url") or row.get("url") in seen:
                continue;
            seen.append(row.get("url"))
            filtered.append(row)
        return rows


    # These regexps only look out for URLs, not local file paths.
    extr_urls = {
        "pls": dict(
            url   = r"(?m) ^File\d* \s*=\s* (\w+://[^\s]+) ",
            title = r"(?m) ^Title\d* \s*=\s*(.+)",
            # Notably this extraction method assumes the entries are grouped in associative order
        ),
        "m3u": dict(
            split = r"(?m) (?=^\#)",
            url   = r"(?m) ^( \w+:// [^#\n]+ )",
            title = r"(?m) ^ \#EXTINF [-:\d,]* (.+)",
        ),
468
469
470
471
472
473
474













475
476
477
478
479
480
481
        ),
        "raw": dict(
            url   = r" (?i) ( [\w+]+:// [^\s\"\'\>\#]+ ) ",
            title = r"(?i)Title[\W]+(.+)",
            unesc = "*",
        ),
    }















    # Add placeholder fields to extracted row
    def mkrow(self, row, title=None):
        url = row.get("url", "")
        comb = {
            "title": row.get("title") or re.sub("\.\w+$", "", os.path.basename(self.fn)),







>
>
>
>
>
>
>
>
>
>
>
>
>







475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
        ),
        "raw": dict(
            url   = r" (?i) ( [\w+]+:// [^\s\"\'\>\#]+ ) ",
            title = r"(?i)Title[\W]+(.+)",
            unesc = "*",
        ),
    }
    
    
    # More exact PLS extraction (for the unlikely case entries were misordered)
    def pls(self):
        fieldmap = dict(file="url", title="title")
        rows = {}
        for field,num,value in re.findall("^\s* ([a-z_-]+) (\d+) \s*=\s* (.*) $", self.src, re.M|re.I|re.X):
            if not num in rows:
                rows[num] = {}
            field = fieldmap.get(field.lower())
            if field:
                rows[num][field] = value.strip()
        return [rows[str(i)] for i in sorted(map(int, rows.keys()))]


    # Add placeholder fields to extracted row
    def mkrow(self, row, title=None):
        url = row.get("url", "")
        comb = {
            "title": row.get("title") or re.sub("\.\w+$", "", os.path.basename(self.fn)),