Index: channels/__init__.py ================================================================== --- channels/__init__.py +++ channels/__init__.py @@ -32,11 +32,10 @@ import gtk from uikit import uikit, ver as gtk_ver from config import * import ahttp import action -import favicon import os.path import xml.sax.saxutils import re import copy import inspect @@ -373,23 +372,18 @@ if row.get("favourite"): streams[i]["state"] = gtk.STOCK_ABOUT if conf.retain_deleted and row.get("deleted"): streams[i]["state"] = gtk.STOCK_DELETE - # favicons? - if conf.show_favicons: - # entry provides its own image - if "img" in row: - favicon_url = row["img"] - streams[i]["favicon"] = favicon.localcopy(favicon_url) - - # get actual homepage favicon.png - elif "homepage" in row: - homepage_url = row.get("homepage") - # check for availability of PNG file, inject local icons/ filename - if homepage_url and favicon.available(homepage_url): - streams[i]["favicon"] = favicon.file(homepage_url) + # Favicons? construct local cache filename, basically reimplements favicon.row_to_fn() + if conf.show_favicons and "favicon" in self.parent.features: + url = row.get("img") or row.get("homepage") + if url: + # Normalize by stripping proto:// and non-alphanumeric chars + url = re.sub("[^\w._-]", "_", re.sub("^\w+://|/$", "", url.lower())) + streams[i]["favicon"] = "{}/icons/{}.png".format(conf.dir, url) + return streams # data preparations directly after reload # ADDED channels/favicon.py Index: channels/favicon.py ================================================================== --- channels/favicon.py +++ channels/favicon.py @@ -0,0 +1,329 @@ +# encoding: utf-8 +# api: streamtuner2 +# title: Favicons +# description: Display station favicons/logos. Instantly download them when ▸playing. +# config: +# { name: favicon_google_first, type: bool, value: 1, description: "Prefer faster Google favicon to PNG conversion service." } +# { name: favicon_delete_stub , type: bool, value: 1, description: "Don't accept any placeholder favicons." } +# [ main-name: google_homepage ] +# [ main-name: load_favicon ] +# type: feature +# category: ui +# version: 1.7 +# depends: streamtuner2 >= 2.1.9, python:pil +# priority: standard +# +# This module fetches a favicon for each station, or a small banner +# or logo for some channel modules. It converts .ico image files and +# sanitizes .png or .jpeg images even prior display. +# +# It prepares cache files in ~/.config/streamtuner2/icons/ in agreement +# with the station list display logic. Either uses station "homepage" +# or "img" URLs from entry rows{}. +# +# While it can often discover favicons directly from station homepages, +# it's often speedier to user the Google PNG conversion service. Both +# depend on a recent Pillow2 python module (superseding the PIL module). +# Else may display images with fragments if converted from ICO files. +# +# Has recently been rewritten, is somewhat less entangled with other +# modules now: +# · GenericChannel presets row["favicon"] with cache image filename +# in any case, uses row["homepage"] or row["img"] as template +# · the filename shortening functionality must be shared between +# favicon and genericchannel.prepare() code +# · uikit.columns() merely checks row["favicon"] for file existence +# on redraws +# · main.play() only calls .update_playing() or .update_all() +# · urllib is no longer required, uses the main ahttp/requests API +# · still might need unhtml() from channels/__init__ later +# · Reduce config options → move main favicon options here? + + +import os, os.path +from compat2and3 import StringIO +import re +from config import * +import ahttp +from PIL import Image +from uikit import gtk + + +# Ensure that we don't try to download a single favicon twice per session. +# If it's not available the first time, we won't get it after switching +# stations back and forth either. So URLs are skipped simply. +tried_urls = [] + + + +# Hook up as feature plugin +# +class favicon(object): + + # plugin attributes + module = "favicon" + meta = plugin_meta() + + + # Register with main + def __init__(self, parent): + + # Reference main, and register hook + self.parent, self.main = parent, parent + parent.hooks["play"].append(self.update_playing) + + # Prepare favicon cache directory + conf.icon_dir = conf.dir + "/icons" + if not os.path.exists(conf.icon_dir): + os.mkdir(conf.icon_dir) + open(icon_dir+"/.nobackup", "a").close() + + + + # Main callback: update favicon cache for complete list of station rows + def update_all(self, *args, **kwargs): + #kwargs[pixstore] = self.parent.channel()._ls, ... + self.parent.thread(self.update_rows, *args, **kwargs) + + + # Main callback for a single play() event + def update_playing(self, row, pixstore=None): + + # Homepage search + if conf.google_homepage and not len(row.get("homepage", "")): + google_find_homepage(row) + + # Favicon only for currently playing station + if conf.load_favicon: + if row.get("homepage") or row.get("img"): + self.update_all([row], pixstore=pixstore) + + + # Run through rows[] to update "favicon" from "homepage" or "img", + # optionally display new image right away in ListStore + def update_rows(self, entries, pixstore=None): + for i,row in enumerate(entries): + ok = False + + # Try just once + if row.get("homepage") in tried_urls: + continue + # Ignore existing ["favicon"] filename + if row.get("favicon") and False: + pass + + # Cache image filename: have or can't have + favicon_fn = row_to_fn(row) + if not favicon_fn: + continue + if os.path.exists(favicon_fn): + continue + + # Custom "img" banner/logo as favicon + if row.get("img"): + tried_urls.append(row["img"]) + ok = banner_localcopy(row["img"], favicon_fn) + + # Homepage to favicon + elif row.get("homepage"): + tried_urls.append(row["homepage"]) + if conf.favicon_google_first: + ok = fav_google_ico2png(row["homepage"], favicon_fn) + else: + ok = fav_from_homepage(row["homepage"], favicon_fn) + + # Update TreeView + if ok: + self.update_pixstore(row, pixstore, i) + pass + + + # Update favicon in treeview/liststore + def update_pixstore(self, row, pixstore=None, row_i=None): + log.FAVICON_UPDATE_PIXSTORE(pixstore, row_i) + if not pixstore: + return + + # Unpack ListStore, pixbuf column no, preset rowno + ls, pix_entry, i = pixstore + # Else rows-iteration rowno + if i is None: + i = row_i + + # Existing "favicon" cache filename + if row.get("favicon"): + fn = row["favicon"] + else: + fn = row_to_fn(row) + + # Update pixbuf in active station liststore + if fn and os.path.exists(fn): + try: + p = gtk.gdk.pixbuf_new_from_file(fn) + ls[i][pix_entry] = p + except Exception as e: + log.ERR("Update_pixstore image", fn, "error:", e) + + + + +#--- somewhat unrelated --- +# +# Should become a distinct feature plugin. - It just depends on correct +# invocation order for plugins to work. +# Googling is often blocked anyway, because this is clearly a bot request. +# Tag requests with ?client=streamtuner2 purposefully still. +# +def google_find_homepage(self, row): + """ Searches for missing homepage URL via Google. """ + if row.get("url") not in tried_urls: + tried_urls.append(row.get("url")) + + if row.get("title"): + rx_t = re.compile('^(([^-:]+.?){1,2})') + rx_u = re.compile(r'/url\?q=(https?://[^"&/]+)') + + # Use literal station title now + title = row["title"] + #title = title.group(0).replace(" ", "%20") + + # Do 'le google search + html = ahttp.get("http://www.google.com/search", params=dict(hl="en", q=title, client="streamtuner2"), ajax=1) + + # Find first URL hit + url = rx_u.findall(html) + if url: + row["homepage"] = ahttp.fix_url(url[0]) + pass +#----------------- + + + + +# Convert row["img"] or row["homepage"] into local favicon cache filename +def row_to_fn(row): + url = row.get("img") or row.get("homepage") or None + if url: + url = url.lower() + url = re.sub("^\w+://|/$", "", url) # strip proto:// and trailing / + url = re.sub("[^\w._-]", "_", url) # remove any non-word characters + url = "{}/{}.png".format(conf.icon_dir, url) + return url + + + +# Copy banner row["img"] into icons/ directory +def banner_localcopy(url, fn): + + # Check URL and target filename + if not re.match("^https?://[\w.-]{10}", url): + return False + + # Fetch and save + imgdata = ahttp.get(url, binary=1, verify=False) + if imgdata: + return store_image(imgdata, fn) + + + +# Check valid image, possibly convert, and save to cache filename +def store_image(imgdata, fn, resize=None): + + # Convert accepted formats -- even PNG for filtering now + if re.match(br'^(.PNG|GIF\d+|.{0,15}JFIF|\x00\x00\x01\x00|.{0,255}]+svg)', imgdata): + try: + # Read from byte/str + image = Image.open(StringIO(imgdata)) + log.FAVICON_IMAGE_TO_PNG(image, resize) + + # Resize + if resize and image.size[0] > resize: + image.resize((resize, resize), Image.ANTIALIAS) + + # Convert to PNG via string buffer + out = StringIO() + image.save(out, "PNG", quality=98) + imgdata = out.getvalue() + + except Exception as e: + return log.ERR("favicon/logo conversion error:", e) and False + else: + log.WARN("couldn't detect mime type") + + # PNG already? + if re.match(b"^.(PNG)", imgdata): + try: + with open(fn, "wb") as f: + f.write(imgdata) + return True + except Exception as e: + log.ERR("favicon.store_image() failure:", e) + + + +# PNG via Google ico2png +def fav_google_ico2png(url, fn): + log.FAVICON("google ico2png") + + # Download from service + domain = re.sub("^\w+://|/.*$", "", url).lower() + geturl = "http://www.google.com/s2/favicons?domain={}".format(domain) + imgdata = ahttp.get(geturl, binary=1, timeout=2.5) + + # Check for stub sizes + if conf.favicon_delete_stub and len(imgdata) in (726,896): # google_placeholder_filesizes + log.FAVICON("placeholder size, skipping") + return False + # Save + else: + return store_image(imgdata, fn) + + + +# Peek at homepage URL, download favicon.ico , convert to PNG file, resize to 16x16 +def fav_from_homepage(url, fn): + + # Check for + img = html_link_icon(url) + if not img: + return False + + # Fetc image, verify MIMEE type + r = ahttp.get(img, binary=1, content=0, timeout=2.75) + if not re.match('image/(png|jpe?g|png|ico|x-ico|vnd.microsoft.ico)', r.headers["content-type"], re.I): + log.WARN("content-type wrong", r.headers) + return False + + # Convert, resize and save + return store_image(r.content, fn, resize=16) + + + +# Download HTML, look for favicon name in . +# +# Very rough, doesn't respect any and manually patches +# icon path to homepage url; nor does any entity decoding. +# +def html_link_icon(url, href="/favicon.png"): + html = ahttp.get(url, encoding="iso-8859-1", timeout=3.5) + # Extract + for link in re.findall(r""" ]+) > """, html, re.X): + pair = re.findall(r""" \b(rel|href) \s*=\s* ["']? ([^<>"']+) ["']? """, link, re.X) + pair = { name: val for name, val in pair } + for name in ("shortcut icon", "favicon", "icon", "icon shortcut"): + if name == pair.get("rel", "ignore") and pair.get("href"): + href = pair["href"] # unhtml() + break + # Patch URL together (strip double proto/domain, or double slash) + return re.sub("^(https?://\w[^/]+\w)?/?(https?://\w[^/]+\w)/?(/.+)$", "\g<2>\g<3>", url+href) + + + + + +#-- test +if __name__ == "__main__": + import sys + favicon(None).download(sys.argv[1]) + + DELETED favicon.py Index: favicon.py ================================================================== --- favicon.py +++ favicon.py @@ -1,312 +0,0 @@ -# encoding: utf-8 -# api: streamtuner2 -# title: favicon download -# description: retrieves favicons for station homepages, plus utility code for display preparation -# config: -# { name: favicon_google_first, type: bool, value: 1, description: "always use google favicon to png conversion service" } -# { name: favicon_google_only, type: bool, value: 1, description: "don't try other favicon retrieval methods, if google service fails" } -# { name: favicon_delete_stub , type: bool, value: 1, description: "delete placeholder favicons" } -# type: function -# category: ui -# priority: standard -# -# This module fetches favicon.ico files and prepares .png images for each domain -# in the stations list. Homepage URLs are used for this. -# -# Files end up in: -# /home/user/.config/streamtuner2/icons/www.example.org.png -# -# Currently relies on Google conversion service, because urllib+PIL conversion -# method is still flaky, and a bit slower. Future version might use imagemagick. - - -always_google = 1 # use favicon service for speed -only_google = 1 # if that fails, try our other/slower methods? -delete_google_stub = 1 # don't keep placeholder images -google_placeholder_filesizes = (726,896) - - -import os, os.path -from compat2and3 import xrange, urllib -import re -from config import * -from threading import Thread -import ahttp -import compat2and3 -from PIL import Image -from uikit import gtk - - - -# ensure that we don't try to download a single favicon twice per session, -# if it's not available the first time, we won't get it after switching stations back and forth -tried_urls = [] - - - - -# walk through entries -def download_all(*args, **kwargs): - t = Thread(target=download_thread, args=args, kwargs=kwargs) - t.start() -def download_thread(entries, pixstore=None): - for i,e in enumerate(entries): - # try just once - if e.get("homepage") in tried_urls: - continue - - # retrieve specific img url as favicon - elif e.get("img"): - localcopy(e["img"], True) - tried_urls.append(e.get("img")) - # favicon from homepage URL - elif e.get("homepage"): - download(e["homepage"]) - tried_urls.append(e.get("homepage")) - - # Update TreeView - update_pixstore(e, pixstore, i) - pass - -# download a single favicon for currently playing station -def download_playing(row, pixstore=None): - if conf.google_homepage and not row.get("homepage"): - google_find_homepage(row) - if conf.load_favicon and row.get("homepage"): - download_all([row], pixstore=pixstore) - pass - - -# Update favicon in treeview/liststore -def update_pixstore(row, pixstore=None, row_i=None): - log.PIXSTORE(pixstore, row_i) - if pixstore: - ls, pix_entry, i = pixstore - if i is None: - i = row_i - fn = None - if row.get("favicon"): - fn = row["favicon"] - elif row.get("img"): - fn = localcopy(row["img"], False) - elif row.get("homepage"): - fn = file(row["homepage"]) - if fn and os.path.exists(fn): - p = gtk.gdk.pixbuf_new_from_file(fn) - ls[i][pix_entry] = p - - -#--- unrelated --- -def google_find_homepage(row): - """ Searches for missing homepage URL via Google. """ - if row.get("url") not in tried_urls: - tried_urls.append(row.get("url")) - - rx_t = re.compile('^(([^-:]+.?){1,2})') - rx_u = re.compile('"(http://[^"]+)" class=l') - - # extract first title parts - title = rx_t.search(row["title"]) - if title: - title = title.group(0).replace(" ", "%20") - - # do a google search - html = ahttp.get("http://www.google.de/search?hl=de&q="+title, params={}, ajax=1) - - # find first URL hit - url = rx_u.search(html) - if url: - row["homepage"] = ahttp.fix_url(url.group(1)) - pass -#----------------- - - - -# extract domain name -def domain(url): - if url.startswith("http://"): - return url[7:url.find("/", 8)] # we assume our URLs are fixed already (http://example.org/ WITH trailing slash!) - else: - return "null" - -# local filename -def name(url): - return domain(url) + ".png" - -# local filename -def file(url): - icon_dir = conf.dir + "/icons" - if not os.path.exists(icon_dir): - os.mkdir(icon_dir) - open(icon_dir+"/.nobackup", "w").close() - return icon_dir + "/" + name(url) - -# does the favicon exist -def available(url): - return os.path.exists(file(url)) - - -# copy image from url into icons/ directory -def localcopy(url, download=False): - if url and url.startswith("http"): - fn = re.sub("[:/]", "_", url) - fn = conf.dir + "/icons/" + fn - if os.path.exists(fn): - return fn - elif download: - imgdata = ahttp.get(url, binary=1, verify=False) - with open(fn, "wb") as f: - f.write(imgdata) - f.close() - if os.path.exists(fn): - return fn - else: - return url - - - - -# download favicon for given URL -def download(url): - - # skip if .png for domain already exists - if available(url): - return - - - # fastest method, so default to google for now - if always_google: - google_ico2png(url) - if available(url) or only_google: - return - - try: # look for /favicon.ico first - log.FAVICON("try /favicon.ico") - direct_download("http://"+domain(url)+"/favicon.ico", file(url)) - - except: - try: # extract facicon filename from website - log.FAVICON("html ") - html_download(url) - - except Exception as e: # fallback - log.ERR(e) - google_ico2png(url) - - - - -# retrieve PNG via Google ico2png -def google_ico2png(url): - log.FAVICON("google ico2png") - - GOOGLE = "http://www.google.com/s2/favicons?domain=" - (fn, headers) = urllib.urlretrieve(GOOGLE+domain(url), file(url)) - - # test for stub image - if delete_google_stub and (filesize(fn) in google_placeholder_filesizes): - os.remove(fn) - - -def filesize(fn): - return os.stat(fn).st_size - - - -# mime magic -def filetype(fn): - f = open(fn, "rb") - bin = f.read(4) - f.close() - if bin[1:3] == "PNG": - return "image/png" - else: - return "*/*" - - - -# favicon.ico -def direct_download(favicon, fn): - - # URL download - r = urllib.urlopen(favicon) - headers = r.info() - log.HTTP(headers) - - # abort on - if r.getcode() >= 300: - raise Exception("HTTP error %s" % r.getcode()) - if not headers["Content-Type"].lower().find("image/") == 0: - raise Exception("can't use text/* content") - - # save file - fn_tmp = fn+".tmp" - f = open(fn_tmp, "wb") - f.write(r.read(32768)) - f.close() - - # check type - if headers["Content-Type"].lower()=="image/png" and favicon.find(".png") and filetype(fn)=="image/png": - pngresize(fn_tmp) - os.mv(fn_tmp, fn) - else: - ico2png(fn_tmp, fn) - os.remove(fn_tmp) - - - -# peek at URL, download favicon.ico -def html_download(url): - - - # - #try: - # download html, look for @href in - r = urllib.urlopen(url) - html = r.read(4096) - r.close() - rx = re.compile("""]+rel\s*=\s*"?\s*(?:shortcut\s+|fav)?icon[^<>]+href=["'](?P[^<>"']+)["'<>\s].""") - favicon = "".join(rx.findall(html)) - log.DATA(favicon) - - # url or - if favicon.startswith("http://"): - None - # just /pathname - else: - favicon = compat2and3.urlparse.urljoin(url, favicon) - log.FAVICON(favicon) - #favicon = "http://" + domain(url) + "/" + favicon - - # download - direct_download(favicon, file(url)) - - - -# convert .ico file to .png format -def ico2png(ico, png_fn): - image = Image.open(ico) - log.FAVICON_ICO2PNG(ico, png, image) - # resize - if image.size[0] > 16: - image.resize((16, 16), Image.ANTIALIAS) - # .png format - image.save(png_fn, "PNG", quality=98) - - -# resize an image -def pngresize(fn, x=16, y=16): - image = Image.open(fn) - if image.size[0] > x: - image.resize((x, y), Image.ANTIALIAS) - image.save(fn, "PNG", quality=98) - - - - -#-- test -if __name__ == "__main__": - import sys - download(sys.argv[1]) - - Index: st2.py ================================================================== --- st2.py +++ st2.py @@ -54,11 +54,10 @@ # custom modules import ahttp import action import logo -import favicon import channels import channels.bookmarks import channels.configwin import channels.streamedit import channels.search @@ -73,11 +72,11 @@ widgets = {} # non-glade widgets (any manually instantiated ones) channels = {} # channel modules features = {} # non-channel plugins working = [] # threads hooks = { - "play": [favicon.download_playing], # observers queue here + "play": [], # observers queue here "record": [], "init": [], "quit": [action.cleanup_tmp_files], "config_load": [], "config_save": [], @@ -120,12 +119,14 @@ [callback(self) for callback in self.hooks["init"]] # display current open channel/notebook tab gui_startup(18/20.0) self.current_channel = self.current_channel_gtk() - try: self.channel().first_show() - except: log.INIT("main.__init__: current_channel.first_show() initialization error") + try: + self.channel().first_show() + except Exception as e: + log.INIT("main.__init__: current_channel.first_show() initialization error:", e) # bind gtk/glade event names to functions gui_startup(19.75/20.0) self.connect_signals({ @@ -295,14 +296,14 @@ #@TODO: should get a wrapper, for HTTP errors, and optionalize bookamrks lambda: ( self.channel().load(category,reload), reload and self.bookmarks.heuristic_update(self.current_channel,category) ) ) # Thread a function, add to worker pool (for utilizing stop button) - def thread(self, target, *args): + def thread(self, target, *args, **kwargs): if conf.nothreads: - return target(*args) - thread = Thread(target=target, args=args) + return target(*args, **kwargs) + thread = Thread(target=target, args=args, kwargs=kwargs) thread.start() self.working.append(thread) # Click in category list @@ -323,12 +324,13 @@ def update_categories(self, widget): Thread(target=self.channel().reload_categories).start() # Menu invocation: refresh favicons for all stations in current streams category def update_favicons(self, widget): - ch = self.channel() - favicon.download_all(entries=ch.stations(), pixstore=[ch._ls, ch._pix_entry, None]) + if "favicon" in self.features: + ch = self.channel() + self.features["favicon"].update_all(entries=ch.stations(), pixstore=[ch._ls, ch._pix_entry, None]) # Save stream to file (.m3u) def save_as(self, widget): row = self.row() default_fn = row["title"] + ".m3u"