1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312 | # encoding: utf-8
# api: streamtuner2
# title: favicon download
# description: retrieves favicons for station homepages, plus utility code for display preparation
# config:
# { name: favicon_google_first, type: bool, value: 1, description: "always use google favicon to png conversion service" }
# { name: favicon_google_only, type: bool, value: 1, description: "don't try other favicon retrieval methods, if google service fails" }
# { name: favicon_delete_stub , type: bool, value: 1, description: "delete placeholder favicons" }
# type: function
# category: ui
# priority: standard
#
# This module fetches favicon.ico files and prepares .png images for each domain
# in the stations list. Homepage URLs are used for this.
#
# Files end up in:
# /home/user/.config/streamtuner2/icons/www.example.org.png
#
# Currently relies on Google conversion service, because urllib+PIL conversion
# method is still flaky, and a bit slower. Future version might use imagemagick.
always_google = 1 # use favicon service for speed
only_google = 1 # if that fails, try our other/slower methods?
delete_google_stub = 1 # don't keep placeholder images
google_placeholder_filesizes = (726,896)
import os, os.path
from compat2and3 import xrange, urllib
import re
from config import *
from threading import Thread
import ahttp
import compat2and3
from PIL import Image
from uikit import gtk
# ensure that we don't try to download a single favicon twice per session,
# if it's not available the first time, we won't get it after switching stations back and forth
tried_urls = []
# walk through entries
def download_all(*args, **kwargs):
t = Thread(target=download_thread, args=args, kwargs=kwargs)
t.start()
def download_thread(entries, pixstore=None):
for i,e in enumerate(entries):
# try just once
if e.get("homepage") in tried_urls:
continue
# retrieve specific img url as favicon
elif e.get("img"):
localcopy(e["img"], True)
tried_urls.append(e.get("img"))
# favicon from homepage URL
elif e.get("homepage"):
download(e["homepage"])
tried_urls.append(e.get("homepage"))
# Update TreeView
update_pixstore(e, pixstore, i)
pass
# download a single favicon for currently playing station
def download_playing(row, pixstore=None):
if conf.google_homepage and not row.get("homepage"):
google_find_homepage(row)
if conf.load_favicon and row.get("homepage"):
download_all([row], pixstore=pixstore)
pass
# Update favicon in treeview/liststore
def update_pixstore(row, pixstore=None, row_i=None):
log.PIXSTORE(pixstore, row_i)
if pixstore:
ls, pix_entry, i = pixstore
if i is None:
i = row_i
fn = None
if row.get("favicon"):
fn = row["favicon"]
elif row.get("img"):
fn = localcopy(row["img"], False)
elif row.get("homepage"):
fn = file(row["homepage"])
if fn and os.path.exists(fn):
p = gtk.gdk.pixbuf_new_from_file(fn)
ls[i][pix_entry] = p
#--- unrelated ---
def google_find_homepage(row):
""" Searches for missing homepage URL via Google. """
if row.get("url") not in tried_urls:
tried_urls.append(row.get("url"))
rx_t = re.compile('^(([^-:]+.?){1,2})')
rx_u = re.compile('"(http://[^"]+)" class=l')
# extract first title parts
title = rx_t.search(row["title"])
if title:
title = title.group(0).replace(" ", "%20")
# do a google search
html = ahttp.get("http://www.google.de/search?hl=de&q="+title, params={}, ajax=1)
# find first URL hit
url = rx_u.search(html)
if url:
row["homepage"] = ahttp.fix_url(url.group(1))
pass
#-----------------
# extract domain name
def domain(url):
if url.startswith("http://"):
return url[7:url.find("/", 8)] # we assume our URLs are fixed already (http://example.org/ WITH trailing slash!)
else:
return "null"
# local filename
def name(url):
return domain(url) + ".png"
# local filename
def file(url):
icon_dir = conf.dir + "/icons"
if not os.path.exists(icon_dir):
os.mkdir(icon_dir)
open(icon_dir+"/.nobackup", "w").close()
return icon_dir + "/" + name(url)
# does the favicon exist
def available(url):
return os.path.exists(file(url))
# copy image from url into icons/ directory
def localcopy(url, download=False):
if url and url.startswith("http"):
fn = re.sub("[:/]", "_", url)
fn = conf.dir + "/icons/" + fn
if os.path.exists(fn):
return fn
elif download:
imgdata = ahttp.get(url, binary=1, verify=False)
with open(fn, "wb") as f:
f.write(imgdata)
f.close()
if os.path.exists(fn):
return fn
else:
return url
# download favicon for given URL
def download(url):
# skip if .png for domain already exists
if available(url):
return
# fastest method, so default to google for now
if always_google:
google_ico2png(url)
if available(url) or only_google:
return
try: # look for /favicon.ico first
log.FAVICON("try /favicon.ico")
direct_download("http://"+domain(url)+"/favicon.ico", file(url))
except:
try: # extract facicon filename from website <link rel>
log.FAVICON("html <rel favicon>")
html_download(url)
except Exception as e: # fallback
log.ERR(e)
google_ico2png(url)
# retrieve PNG via Google ico2png
def google_ico2png(url):
log.FAVICON("google ico2png")
GOOGLE = "http://www.google.com/s2/favicons?domain="
(fn, headers) = urllib.urlretrieve(GOOGLE+domain(url), file(url))
# test for stub image
if delete_google_stub and (filesize(fn) in google_placeholder_filesizes):
os.remove(fn)
def filesize(fn):
return os.stat(fn).st_size
# mime magic
def filetype(fn):
f = open(fn, "rb")
bin = f.read(4)
f.close()
if bin[1:3] == "PNG":
return "image/png"
else:
return "*/*"
# favicon.ico
def direct_download(favicon, fn):
# URL download
r = urllib.urlopen(favicon)
headers = r.info()
log.HTTP(headers)
# abort on
if r.getcode() >= 300:
raise Exception("HTTP error %s" % r.getcode())
if not headers["Content-Type"].lower().find("image/") == 0:
raise Exception("can't use text/* content")
# save file
fn_tmp = fn+".tmp"
f = open(fn_tmp, "wb")
f.write(r.read(32768))
f.close()
# check type
if headers["Content-Type"].lower()=="image/png" and favicon.find(".png") and filetype(fn)=="image/png":
pngresize(fn_tmp)
os.mv(fn_tmp, fn)
else:
ico2png(fn_tmp, fn)
os.remove(fn_tmp)
# peek at URL, download favicon.ico <link rel>
def html_download(url):
# <link rel>
#try:
# download html, look for @href in <link rel=shortcut icon>
r = urllib.urlopen(url)
html = r.read(4096)
r.close()
rx = re.compile("""<link[^<>]+rel\s*=\s*"?\s*(?:shortcut\s+|fav)?icon[^<>]+href=["'](?P<href>[^<>"']+)["'<>\s].""")
favicon = "".join(rx.findall(html))
log.DATA(favicon)
# url or
if favicon.startswith("http://"):
None
# just /pathname
else:
favicon = compat2and3.urlparse.urljoin(url, favicon)
log.FAVICON(favicon)
#favicon = "http://" + domain(url) + "/" + favicon
# download
direct_download(favicon, file(url))
# convert .ico file to .png format
def ico2png(ico, png_fn):
image = Image.open(ico)
log.FAVICON_ICO2PNG(ico, png, image)
# resize
if image.size[0] > 16:
image.resize((16, 16), Image.ANTIALIAS)
# .png format
image.save(png_fn, "PNG", quality=98)
# resize an image
def pngresize(fn, x=16, y=16):
image = Image.open(fn)
if image.size[0] > x:
image.resize((x, y), Image.ANTIALIAS)
image.save(fn, "PNG", quality=98)
#-- test
if __name__ == "__main__":
import sys
download(sys.argv[1])
|
|
|
|
<
|
>
>
|
>
>
|
<
>
>
|
<
|
>
>
>
|
<
|
>
|
>
>
>
>
>
|
>
|
<
|
>
>
>
|
<
<
<
|
|
>
>
|
>
|
>
>
>
|
|
|
|
|
>
>
>
|
>
>
>
>
>
|
|
>
>
>
>
|
|
>
>
|
>
>
>
|
>
|
|
>
|
|
>
>
>
>
>
|
<
>
>
>
>
|
>
>
>
>
>
>
|
>
>
>
>
|
>
|
>
|
>
>
>
|
>
>
|
|
|
|
|
|
>
>
>
>
|
>
|
|
|
<
>
>
|
|
>
>
>
>
|
>
>
>
>
>
>
|
>
|
<
|
|
|
|
|
|
|
|
|
|
|
|
>
|
>
|
<
<
>
>
>
>
|
<
<
<
|
<
<
<
<
<
<
<
|
<
>
|
<
|
|
<
<
<
|
<
<
|
|
>
|
<
|
<
<
|
<
<
|
|
|
>
<
<
<
<
>
>
|
|
>
>
|
|
|
<
<
>
>
|
<
<
|
<
<
<
<
|
|
<
|
|
|
<
<
<
|
<
<
|
<
<
<
|
|
<
|
|
>
<
<
<
<
<
<
<
<
<
|
|
|
<
|
<
|
<
|
<
|
<
<
<
|
|
<
<
<
<
|
>
>
|
<
<
<
|
<
|
|
<
|
|
<
<
|
>
|
<
<
<
<
|
|
<
<
<
>
>
>
|
|
>
>
|
|
|
<
|
|
>
|
|
|
<
<
|
>
>
|
|
>
>
|
<
|
>
|
<
<
<
<
<
<
|
| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329 | # encoding: utf-8
# api: streamtuner2
# title: Favicons
# description: Display station favicons/logos. Instantly download them when ▸playing.
# config:
# { name: favicon_google_first, type: bool, value: 1, description: "Prefer faster Google favicon to PNG conversion service." }
# { name: favicon_delete_stub , type: bool, value: 1, description: "Don't accept any placeholder favicons." }
# [ main-name: google_homepage ]
# [ main-name: load_favicon ]
# type: feature
# category: ui
# version: 1.7
# depends: streamtuner2 >= 2.1.9, python:pil
# priority: standard
#
# This module fetches a favicon for each station, or a small banner
# or logo for some channel modules. It converts .ico image files and
# sanitizes .png or .jpeg images even prior display.
#
# It prepares cache files in ~/.config/streamtuner2/icons/ in agreement
# with the station list display logic. Either uses station "homepage"
# or "img" URLs from entry rows{}.
#
# While it can often discover favicons directly from station homepages,
# it's often speedier to user the Google PNG conversion service. Both
# depend on a recent Pillow2 python module (superseding the PIL module).
# Else may display images with fragments if converted from ICO files.
#
# Has recently been rewritten, is somewhat less entangled with other
# modules now:
# · GenericChannel presets row["favicon"] with cache image filename
# in any case, uses row["homepage"] or row["img"] as template
# · the filename shortening functionality must be shared between
# favicon and genericchannel.prepare() code
# · uikit.columns() merely checks row["favicon"] for file existence
# on redraws
# · main.play() only calls .update_playing() or .update_all()
# · urllib is no longer required, uses the main ahttp/requests API
# · still might need unhtml() from channels/__init__ later
# · Reduce config options → move main favicon options here?
import os, os.path
from compat2and3 import StringIO
import re
from config import *
import ahttp
from PIL import Image
from uikit import gtk
# Ensure that we don't try to download a single favicon twice per session.
# If it's not available the first time, we won't get it after switching
# stations back and forth either. So URLs are skipped simply.
tried_urls = []
# Hook up as feature plugin
#
class favicon(object):
# plugin attributes
module = "favicon"
meta = plugin_meta()
# Register with main
def __init__(self, parent):
# Reference main, and register hook
self.parent, self.main = parent, parent
parent.hooks["play"].append(self.update_playing)
# Prepare favicon cache directory
conf.icon_dir = conf.dir + "/icons"
if not os.path.exists(conf.icon_dir):
os.mkdir(conf.icon_dir)
open(icon_dir+"/.nobackup", "a").close()
# Main callback: update favicon cache for complete list of station rows
def update_all(self, *args, **kwargs):
#kwargs[pixstore] = self.parent.channel()._ls, ...
self.parent.thread(self.update_rows, *args, **kwargs)
# Main callback for a single play() event
def update_playing(self, row, pixstore=None):
# Homepage search
if conf.google_homepage and not len(row.get("homepage", "")):
google_find_homepage(row)
# Favicon only for currently playing station
if conf.load_favicon:
if row.get("homepage") or row.get("img"):
self.update_all([row], pixstore=pixstore)
# Run through rows[] to update "favicon" from "homepage" or "img",
# optionally display new image right away in ListStore
def update_rows(self, entries, pixstore=None):
for i,row in enumerate(entries):
ok = False
# Try just once
if row.get("homepage") in tried_urls:
continue
# Ignore existing ["favicon"] filename
if row.get("favicon") and False:
pass
# Cache image filename: have or can't have
favicon_fn = row_to_fn(row)
if not favicon_fn:
continue
if os.path.exists(favicon_fn):
continue
# Custom "img" banner/logo as favicon
if row.get("img"):
tried_urls.append(row["img"])
ok = banner_localcopy(row["img"], favicon_fn)
# Homepage to favicon
elif row.get("homepage"):
tried_urls.append(row["homepage"])
if conf.favicon_google_first:
ok = fav_google_ico2png(row["homepage"], favicon_fn)
else:
ok = fav_from_homepage(row["homepage"], favicon_fn)
# Update TreeView
if ok:
self.update_pixstore(row, pixstore, i)
pass
# Update favicon in treeview/liststore
def update_pixstore(self, row, pixstore=None, row_i=None):
log.FAVICON_UPDATE_PIXSTORE(pixstore, row_i)
if not pixstore:
return
# Unpack ListStore, pixbuf column no, preset rowno
ls, pix_entry, i = pixstore
# Else rows-iteration rowno
if i is None:
i = row_i
# Existing "favicon" cache filename
if row.get("favicon"):
fn = row["favicon"]
else:
fn = row_to_fn(row)
# Update pixbuf in active station liststore
if fn and os.path.exists(fn):
try:
p = gtk.gdk.pixbuf_new_from_file(fn)
ls[i][pix_entry] = p
except Exception as e:
log.ERR("Update_pixstore image", fn, "error:", e)
#--- somewhat unrelated ---
#
# Should become a distinct feature plugin. - It just depends on correct
# invocation order for plugins to work.
# Googling is often blocked anyway, because this is clearly a bot request.
# Tag requests with ?client=streamtuner2 purposefully still.
#
def google_find_homepage(self, row):
""" Searches for missing homepage URL via Google. """
if row.get("url") not in tried_urls:
tried_urls.append(row.get("url"))
if row.get("title"):
rx_t = re.compile('^(([^-:]+.?){1,2})')
rx_u = re.compile(r'/url\?q=(https?://[^"&/]+)')
# Use literal station title now
title = row["title"]
#title = title.group(0).replace(" ", "%20")
# Do 'le google search
html = ahttp.get("http://www.google.com/search", params=dict(hl="en", q=title, client="streamtuner2"), ajax=1)
# Find first URL hit
url = rx_u.findall(html)
if url:
row["homepage"] = ahttp.fix_url(url[0])
pass
#-----------------
# Convert row["img"] or row["homepage"] into local favicon cache filename
def row_to_fn(row):
url = row.get("img") or row.get("homepage") or None
if url:
url = url.lower()
url = re.sub("^\w+://|/$", "", url) # strip proto:// and trailing /
url = re.sub("[^\w._-]", "_", url) # remove any non-word characters
url = "{}/{}.png".format(conf.icon_dir, url)
return url
# Copy banner row["img"] into icons/ directory
def banner_localcopy(url, fn):
# Check URL and target filename
if not re.match("^https?://[\w.-]{10}", url):
return False
# Fetch and save
imgdata = ahttp.get(url, binary=1, verify=False)
if imgdata:
return store_image(imgdata, fn)
# Check valid image, possibly convert, and save to cache filename
def store_image(imgdata, fn, resize=None):
# Convert accepted formats -- even PNG for filtering now
if re.match(br'^(.PNG|GIF\d+|.{0,15}JFIF|\x00\x00\x01\x00|.{0,255}<svg[^>]+svg)', imgdata):
try:
# Read from byte/str
image = Image.open(StringIO(imgdata))
log.FAVICON_IMAGE_TO_PNG(image, resize)
# Resize
if resize and image.size[0] > resize:
image.resize((resize, resize), Image.ANTIALIAS)
# Convert to PNG via string buffer
out = StringIO()
image.save(out, "PNG", quality=98)
imgdata = out.getvalue()
except Exception as e:
return log.ERR("favicon/logo conversion error:", e) and False
else:
log.WARN("couldn't detect mime type")
# PNG already?
if re.match(b"^.(PNG)", imgdata):
try:
with open(fn, "wb") as f:
f.write(imgdata)
return True
except Exception as e:
log.ERR("favicon.store_image() failure:", e)
# PNG via Google ico2png
def fav_google_ico2png(url, fn):
log.FAVICON("google ico2png")
# Download from service
domain = re.sub("^\w+://|/.*$", "", url).lower()
geturl = "http://www.google.com/s2/favicons?domain={}".format(domain)
imgdata = ahttp.get(geturl, binary=1, timeout=2.5)
# Check for stub sizes
if conf.favicon_delete_stub and len(imgdata) in (726,896): # google_placeholder_filesizes
log.FAVICON("placeholder size, skipping")
return False
# Save
else:
return store_image(imgdata, fn)
# Peek at homepage URL, download favicon.ico <link rel>, convert to PNG file, resize to 16x16
def fav_from_homepage(url, fn):
# Check for <link rel=icon>
img = html_link_icon(url)
if not img:
return False
# Fetc image, verify MIMEE type
r = ahttp.get(img, binary=1, content=0, timeout=2.75)
if not re.match('image/(png|jpe?g|png|ico|x-ico|vnd.microsoft.ico)', r.headers["content-type"], re.I):
log.WARN("content-type wrong", r.headers)
return False
# Convert, resize and save
return store_image(r.content, fn, resize=16)
# Download HTML, look for favicon name in <link rel=shortcut icon>.
#
# Very rough, doesn't respect any <base href=> and manually patches
# icon path to homepage url; nor does any entity decoding.
#
def html_link_icon(url, href="/favicon.png"):
html = ahttp.get(url, encoding="iso-8859-1", timeout=3.5)
# Extract
for link in re.findall(r""" <link ([^<>]+) > """, html, re.X):
pair = re.findall(r""" \b(rel|href) \s*=\s* ["']? ([^<>"']+) ["']? """, link, re.X)
pair = { name: val for name, val in pair }
for name in ("shortcut icon", "favicon", "icon", "icon shortcut"):
if name == pair.get("rel", "ignore") and pair.get("href"):
href = pair["href"] # unhtml()
break
# Patch URL together (strip double proto/domain, or double slash)
return re.sub("^(https?://\w[^/]+\w)?/?(https?://\w[^/]+\w)/?(/.+)$", "\g<2>\g<3>", url+href)
#-- test
if __name__ == "__main__":
import sys
favicon(None).download(sys.argv[1])
|