124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150 | """
# With the new shallow <td> lists it doesn't make much sense to use
# the pyquery DOM traversal. There aren't any sensible selectors to
# extract values; it's just counting the tags.
# And there's a bug in PyQuery 1.2.4 and CssSelector. So make two
# attempts, alternate between regex and DOM; user preference first.
use_regex = not conf.get("pyquery") or not pq
retry = 2
while retry:
retry -= 1
try:
if use_regex:
return self.with_regex(html)
else:
return self.with_dom(html)
except Exception as e:
use_regex ^= 1
__print__(dbg.ERR, e)
return []
# Extract using regex
def with_regex(self, html):
__print__(dbg.PROC, "channels.shoutcast.update_streams: regex scraping mode")
rx_stream = re.compile(
""" |
>
|
|
|
<
>
|
| 124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151 | """
# With the new shallow <td> lists it doesn't make much sense to use
# the pyquery DOM traversal. There aren't any sensible selectors to
# extract values; it's just counting the tags.
# And there's a bug in PyQuery 1.2.4 and CssSelector. So make two
# attempts, alternate between regex and DOM; user preference first.
entries = []
use_regex = not conf.get("pyquery") or not pq
retry = 2
while retry and not entries:
retry -= 1
try:
if use_regex:
entries = self.with_regex(html)
else:
entries = self.with_dom(html)
except Exception as e:
__print__(dbg.ERR, e)
use_regex ^= 1
return entries
# Extract using regex
def with_regex(self, html):
__print__(dbg.PROC, "channels.shoutcast.update_streams: regex scraping mode")
rx_stream = re.compile(
""" |