81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
# Is there a next page?
if str(page+1) not in rx_pages.findall(html[-1]):
break
self.parent.status(float(page)/float(max_pages+1), timeout=1)
# Alternatively try regex or pyquery parsing
#log.HTTP(html)
for use_rx in [not conf.pyquery, conf.pyquery]:
try:
entries = (self.with_regex(html) if use_rx else self.with_dom(html))
if len(entries):
break
except Exception as e:
log.ERR(e)
continue
# fin
log.FINISHED("internet_radio.update_streams")
return entries
# Regex extraction
def with_regex(self, html):
log.PROC("internet-radio, regex")
r = []
html = "\n".join(html)
|
<
<
<
|
<
<
<
<
>
>
>
>
>
>
>
>
>
|
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
# Is there a next page?
if str(page+1) not in rx_pages.findall(html[-1]):
break
self.parent.status(float(page)/float(max_pages+1), timeout=1)
# Alternatively try regex or pyquery parsing
#log.HTTP(html)
entries = self.from_html(html)
# fin
log.FINISHED("internet_radio.update_streams")
return entries
# Switch update method
@use_rx
def from_html(self, html, use_rx):
if use_rx:
return self.with_regex(html)
else:
return self.with_dom(html)
# Regex extraction
def with_regex(self, html):
log.PROC("internet-radio, regex")
r = []
html = "\n".join(html)
|