@@ -1,312 +1,329 @@ # encoding: utf-8 # api: streamtuner2 -# title: favicon download -# description: retrieves favicons for station homepages, plus utility code for display preparation +# title: Favicons +# description: Display station favicons/logos. Instantly download them when ▸playing. # config: -# { name: favicon_google_first, type: bool, value: 1, description: "always use google favicon to png conversion service" } -# { name: favicon_google_only, type: bool, value: 1, description: "don't try other favicon retrieval methods, if google service fails" } -# { name: favicon_delete_stub , type: bool, value: 1, description: "delete placeholder favicons" } -# type: function +# { name: favicon_google_first, type: bool, value: 1, description: "Prefer faster Google favicon to PNG conversion service." } +# { name: favicon_delete_stub , type: bool, value: 1, description: "Don't accept any placeholder favicons." } +# [ main-name: google_homepage ] +# [ main-name: load_favicon ] +# type: feature # category: ui +# version: 1.7 +# depends: streamtuner2 >= 2.1.9, python:pil # priority: standard # -# This module fetches favicon.ico files and prepares .png images for each domain -# in the stations list. Homepage URLs are used for this. -# -# Files end up in: -# /home/user/.config/streamtuner2/icons/www.example.org.png -# -# Currently relies on Google conversion service, because urllib+PIL conversion -# method is still flaky, and a bit slower. Future version might use imagemagick. - - -always_google = 1 # use favicon service for speed -only_google = 1 # if that fails, try our other/slower methods? -delete_google_stub = 1 # don't keep placeholder images -google_placeholder_filesizes = (726,896) +# This module fetches a favicon for each station, or a small banner +# or logo for some channel modules. It converts .ico image files and +# sanitizes .png or .jpeg images even prior display. +# +# It prepares cache files in ~/.config/streamtuner2/icons/ in agreement +# with the station list display logic. Either uses station "homepage" +# or "img" URLs from entry rows{}. +# +# While it can often discover favicons directly from station homepages, +# it's often speedier to user the Google PNG conversion service. Both +# depend on a recent Pillow2 python module (superseding the PIL module). +# Else may display images with fragments if converted from ICO files. +# +# Has recently been rewritten, is somewhat less entangled with other +# modules now: +# · GenericChannel presets row["favicon"] with cache image filename +# in any case, uses row["homepage"] or row["img"] as template +# · the filename shortening functionality must be shared between +# favicon and genericchannel.prepare() code +# · uikit.columns() merely checks row["favicon"] for file existence +# on redraws +# · main.play() only calls .update_playing() or .update_all() +# · urllib is no longer required, uses the main ahttp/requests API +# · still might need unhtml() from channels/__init__ later +# · Reduce config options → move main favicon options here? import os, os.path -from compat2and3 import xrange, urllib +from compat2and3 import StringIO import re from config import * -from threading import Thread import ahttp -import compat2and3 from PIL import Image from uikit import gtk - -# ensure that we don't try to download a single favicon twice per session, -# if it's not available the first time, we won't get it after switching stations back and forth +# Ensure that we don't try to download a single favicon twice per session. +# If it's not available the first time, we won't get it after switching +# stations back and forth either. So URLs are skipped simply. tried_urls = [] - -# walk through entries -def download_all(*args, **kwargs): - t = Thread(target=download_thread, args=args, kwargs=kwargs) - t.start() -def download_thread(entries, pixstore=None): - for i,e in enumerate(entries): - # try just once - if e.get("homepage") in tried_urls: - continue - - # retrieve specific img url as favicon - elif e.get("img"): - localcopy(e["img"], True) - tried_urls.append(e.get("img")) - # favicon from homepage URL - elif e.get("homepage"): - download(e["homepage"]) - tried_urls.append(e.get("homepage")) - - # Update TreeView - update_pixstore(e, pixstore, i) - pass - -# download a single favicon for currently playing station -def download_playing(row, pixstore=None): - if conf.google_homepage and not row.get("homepage"): - google_find_homepage(row) - if conf.load_favicon and row.get("homepage"): - download_all([row], pixstore=pixstore) - pass - - -# Update favicon in treeview/liststore -def update_pixstore(row, pixstore=None, row_i=None): - log.PIXSTORE(pixstore, row_i) - if pixstore: +# Hook up as feature plugin +# +class favicon(object): + + # plugin attributes + module = "favicon" + meta = plugin_meta() + + + # Register with main + def __init__(self, parent): + + # Reference main, and register hook + self.parent, self.main = parent, parent + parent.hooks["play"].append(self.update_playing) + + # Prepare favicon cache directory + conf.icon_dir = conf.dir + "/icons" + if not os.path.exists(conf.icon_dir): + os.mkdir(conf.icon_dir) + open(icon_dir+"/.nobackup", "a").close() + + + + # Main callback: update favicon cache for complete list of station rows + def update_all(self, *args, **kwargs): + #kwargs[pixstore] = self.parent.channel()._ls, ... + self.parent.thread(self.update_rows, *args, **kwargs) + + + # Main callback for a single play() event + def update_playing(self, row, pixstore=None): + + # Homepage search + if conf.google_homepage and not len(row.get("homepage", "")): + google_find_homepage(row) + + # Favicon only for currently playing station + if conf.load_favicon: + if row.get("homepage") or row.get("img"): + self.update_all([row], pixstore=pixstore) + + + # Run through rows[] to update "favicon" from "homepage" or "img", + # optionally display new image right away in ListStore + def update_rows(self, entries, pixstore=None): + for i,row in enumerate(entries): + ok = False + + # Try just once + if row.get("homepage") in tried_urls: + continue + # Ignore existing ["favicon"] filename + if row.get("favicon") and False: + pass + + # Cache image filename: have or can't have + favicon_fn = row_to_fn(row) + if not favicon_fn: + continue + if os.path.exists(favicon_fn): + continue + + # Custom "img" banner/logo as favicon + if row.get("img"): + tried_urls.append(row["img"]) + ok = banner_localcopy(row["img"], favicon_fn) + + # Homepage to favicon + elif row.get("homepage"): + tried_urls.append(row["homepage"]) + if conf.favicon_google_first: + ok = fav_google_ico2png(row["homepage"], favicon_fn) + else: + ok = fav_from_homepage(row["homepage"], favicon_fn) + + # Update TreeView + if ok: + self.update_pixstore(row, pixstore, i) + pass + + + # Update favicon in treeview/liststore + def update_pixstore(self, row, pixstore=None, row_i=None): + log.FAVICON_UPDATE_PIXSTORE(pixstore, row_i) + if not pixstore: + return + + # Unpack ListStore, pixbuf column no, preset rowno ls, pix_entry, i = pixstore + # Else rows-iteration rowno if i is None: i = row_i - fn = None + + # Existing "favicon" cache filename if row.get("favicon"): fn = row["favicon"] - elif row.get("img"): - fn = localcopy(row["img"], False) - elif row.get("homepage"): - fn = file(row["homepage"]) + else: + fn = row_to_fn(row) + + # Update pixbuf in active station liststore if fn and os.path.exists(fn): - p = gtk.gdk.pixbuf_new_from_file(fn) - ls[i][pix_entry] = p + try: + p = gtk.gdk.pixbuf_new_from_file(fn) + ls[i][pix_entry] = p + except Exception as e: + log.ERR("Update_pixstore image", fn, "error:", e) + + -#--- unrelated --- -def google_find_homepage(row): +#--- somewhat unrelated --- +# +# Should become a distinct feature plugin. - It just depends on correct +# invocation order for plugins to work. +# Googling is often blocked anyway, because this is clearly a bot request. +# Tag requests with ?client=streamtuner2 purposefully still. +# +def google_find_homepage(self, row): """ Searches for missing homepage URL via Google. """ if row.get("url") not in tried_urls: tried_urls.append(row.get("url")) + if row.get("title"): rx_t = re.compile('^(([^-:]+.?){1,2})') - rx_u = re.compile('"(http://[^"]+)" class=l') - - # extract first title parts - title = rx_t.search(row["title"]) - if title: - title = title.group(0).replace(" ", "%20") - - # do a google search - html = ahttp.get("http://www.google.de/search?hl=de&q="+title, params={}, ajax=1) - - # find first URL hit - url = rx_u.search(html) - if url: - row["homepage"] = ahttp.fix_url(url.group(1)) + rx_u = re.compile(r'/url\?q=(https?://[^"&/]+)') + + # Use literal station title now + title = row["title"] + #title = title.group(0).replace(" ", "%20") + + # Do 'le google search + html = ahttp.get("http://www.google.com/search", params=dict(hl="en", q=title, client="streamtuner2"), ajax=1) + + # Find first URL hit + url = rx_u.findall(html) + if url: + row["homepage"] = ahttp.fix_url(url[0]) pass #----------------- -# extract domain name -def domain(url): - if url.startswith("http://"): - return url[7:url.find("/", 8)] # we assume our URLs are fixed already (http://example.org/ WITH trailing slash!) - else: - return "null" - -# local filename -def name(url): - return domain(url) + ".png" - -# local filename -def file(url): - icon_dir = conf.dir + "/icons" - if not os.path.exists(icon_dir): - os.mkdir(icon_dir) - open(icon_dir+"/.nobackup", "w").close() - return icon_dir + "/" + name(url) - -# does the favicon exist -def available(url): - return os.path.exists(file(url)) - - -# copy image from url into icons/ directory -def localcopy(url, download=False): - if url and url.startswith("http"): - fn = re.sub("[:/]", "_", url) - fn = conf.dir + "/icons/" + fn - if os.path.exists(fn): - return fn - elif download: - imgdata = ahttp.get(url, binary=1, verify=False) - with open(fn, "wb") as f: - f.write(imgdata) - f.close() - if os.path.exists(fn): - return fn - else: - return url - - - - -# download favicon for given URL -def download(url): - - # skip if .png for domain already exists - if available(url): - return - - - # fastest method, so default to google for now - if always_google: - google_ico2png(url) - if available(url) or only_google: - return - - try: # look for /favicon.ico first - log.FAVICON("try /favicon.ico") - direct_download("http://"+domain(url)+"/favicon.ico", file(url)) - - except: - try: # extract facicon filename from website - log.FAVICON("html ") - html_download(url) - - except Exception as e: # fallback - log.ERR(e) - google_ico2png(url) - - - - -# retrieve PNG via Google ico2png -def google_ico2png(url): - log.FAVICON("google ico2png") - - GOOGLE = "http://www.google.com/s2/favicons?domain=" - (fn, headers) = urllib.urlretrieve(GOOGLE+domain(url), file(url)) - - # test for stub image - if delete_google_stub and (filesize(fn) in google_placeholder_filesizes): - os.remove(fn) - - -def filesize(fn): - return os.stat(fn).st_size - - - -# mime magic -def filetype(fn): - f = open(fn, "rb") - bin = f.read(4) - f.close() - if bin[1:3] == "PNG": - return "image/png" - else: - return "*/*" - - - -# favicon.ico -def direct_download(favicon, fn): - - # URL download - r = urllib.urlopen(favicon) - headers = r.info() - log.HTTP(headers) - - # abort on - if r.getcode() >= 300: - raise Exception("HTTP error %s" % r.getcode()) - if not headers["Content-Type"].lower().find("image/") == 0: - raise Exception("can't use text/* content") - - # save file - fn_tmp = fn+".tmp" - f = open(fn_tmp, "wb") - f.write(r.read(32768)) - f.close() - - # check type - if headers["Content-Type"].lower()=="image/png" and favicon.find(".png") and filetype(fn)=="image/png": - pngresize(fn_tmp) - os.mv(fn_tmp, fn) - else: - ico2png(fn_tmp, fn) - os.remove(fn_tmp) - - - -# peek at URL, download favicon.ico -def html_download(url): - - - # - #try: - # download html, look for @href in - r = urllib.urlopen(url) - html = r.read(4096) - r.close() - rx = re.compile("""]+rel\s*=\s*"?\s*(?:shortcut\s+|fav)?icon[^<>]+href=["'](?P[^<>"']+)["'<>\s].""") - favicon = "".join(rx.findall(html)) - log.DATA(favicon) - - # url or - if favicon.startswith("http://"): - None - # just /pathname - else: - favicon = compat2and3.urlparse.urljoin(url, favicon) - log.FAVICON(favicon) - #favicon = "http://" + domain(url) + "/" + favicon - - # download - direct_download(favicon, file(url)) - - - -# convert .ico file to .png format -def ico2png(ico, png_fn): - image = Image.open(ico) - log.FAVICON_ICO2PNG(ico, png, image) - # resize - if image.size[0] > 16: - image.resize((16, 16), Image.ANTIALIAS) - # .png format - image.save(png_fn, "PNG", quality=98) - - -# resize an image -def pngresize(fn, x=16, y=16): - image = Image.open(fn) - if image.size[0] > x: - image.resize((x, y), Image.ANTIALIAS) - image.save(fn, "PNG", quality=98) + +# Convert row["img"] or row["homepage"] into local favicon cache filename +def row_to_fn(row): + url = row.get("img") or row.get("homepage") or None + if url: + url = url.lower() + url = re.sub("^\w+://|/$", "", url) # strip proto:// and trailing / + url = re.sub("[^\w._-]", "_", url) # remove any non-word characters + url = "{}/{}.png".format(conf.icon_dir, url) + return url + + + +# Copy banner row["img"] into icons/ directory +def banner_localcopy(url, fn): + + # Check URL and target filename + if not re.match("^https?://[\w.-]{10}", url): + return False + + # Fetch and save + imgdata = ahttp.get(url, binary=1, verify=False) + if imgdata: + return store_image(imgdata, fn) + + + +# Check valid image, possibly convert, and save to cache filename +def store_image(imgdata, fn, resize=None): + + # Convert accepted formats -- even PNG for filtering now + if re.match(br'^(.PNG|GIF\d+|.{0,15}JFIF|\x00\x00\x01\x00|.{0,255}]+svg)', imgdata): + try: + # Read from byte/str + image = Image.open(StringIO(imgdata)) + log.FAVICON_IMAGE_TO_PNG(image, resize) + + # Resize + if resize and image.size[0] > resize: + image.resize((resize, resize), Image.ANTIALIAS) + + # Convert to PNG via string buffer + out = StringIO() + image.save(out, "PNG", quality=98) + imgdata = out.getvalue() + + except Exception as e: + return log.ERR("favicon/logo conversion error:", e) and False + else: + log.WARN("couldn't detect mime type") + + # PNG already? + if re.match(b"^.(PNG)", imgdata): + try: + with open(fn, "wb") as f: + f.write(imgdata) + return True + except Exception as e: + log.ERR("favicon.store_image() failure:", e) + + + +# PNG via Google ico2png +def fav_google_ico2png(url, fn): + log.FAVICON("google ico2png") + + # Download from service + domain = re.sub("^\w+://|/.*$", "", url).lower() + geturl = "http://www.google.com/s2/favicons?domain={}".format(domain) + imgdata = ahttp.get(geturl, binary=1, timeout=2.5) + + # Check for stub sizes + if conf.favicon_delete_stub and len(imgdata) in (726,896): # google_placeholder_filesizes + log.FAVICON("placeholder size, skipping") + return False + # Save + else: + return store_image(imgdata, fn) + + + +# Peek at homepage URL, download favicon.ico , convert to PNG file, resize to 16x16 +def fav_from_homepage(url, fn): + + # Check for + img = html_link_icon(url) + if not img: + return False + + # Fetc image, verify MIMEE type + r = ahttp.get(img, binary=1, content=0, timeout=2.75) + if not re.match('image/(png|jpe?g|png|ico|x-ico|vnd.microsoft.ico)', r.headers["content-type"], re.I): + log.WARN("content-type wrong", r.headers) + return False + + # Convert, resize and save + return store_image(r.content, fn, resize=16) + + + +# Download HTML, look for favicon name in . +# +# Very rough, doesn't respect any and manually patches +# icon path to homepage url; nor does any entity decoding. +# +def html_link_icon(url, href="/favicon.png"): + html = ahttp.get(url, encoding="iso-8859-1", timeout=3.5) + # Extract + for link in re.findall(r""" ]+) > """, html, re.X): + pair = re.findall(r""" \b(rel|href) \s*=\s* ["']? ([^<>"']+) ["']? """, link, re.X) + pair = { name: val for name, val in pair } + for name in ("shortcut icon", "favicon", "icon", "icon shortcut"): + if name == pair.get("rel", "ignore") and pair.get("href"): + href = pair["href"] # unhtml() + break + # Patch URL together (strip double proto/domain, or double slash) + return re.sub("^(https?://\w[^/]+\w)?/?(https?://\w[^/]+\w)/?(/.+)$", "\g<2>\g<3>", url+href) + #-- test if __name__ == "__main__": import sys - download(sys.argv[1]) + favicon(None).download(sys.argv[1])