77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
"/" + ("page"+str(page) if page>1 else "")
)
)
# Is there a next page?
if str(page+1) not in rx_pages.findall(html[-1]):
break
self.parent.status(float(page)/float(max_pages+1))
# Alternatively try regex or pyquery parsing
#log.HTTP(html)
for use_rx in [not conf.pyquery, conf.pyquery]:
try:
entries = (self.with_regex(html) if use_rx else self.with_dom(html))
if len(entries):
|
|
|
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
"/" + ("page"+str(page) if page>1 else "")
)
)
# Is there a next page?
if str(page+1) not in rx_pages.findall(html[-1]):
break
self.parent.status(float(page)/float(max_pages+1), timeout=1)
# Alternatively try regex or pyquery parsing
#log.HTTP(html)
for use_rx in [not conf.pyquery, conf.pyquery]:
try:
entries = (self.with_regex(html) if use_rx else self.with_dom(html))
if len(entries):
|