Web Crawling¶
Mechanize¶
http://stockrt.github.io/p/emulating-a-browser-in-python-with-mechanize/
import mechanize
import cookielib
# Browser
br = mechanize.Browser()
# Cookie Jar
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
# Want debugging messages?
#br.set_debug_http(True)
#br.set_debug_redirects(True)
#br.set_debug_responses(True)
# User-Agent
useragent = [('User-agent',
("Mozilla/5.0 (Windows NT 6.1; rv:7.0.1) Gecko/20100101 "
"Firefox/7.0.1"))]
br.addheaders = useragent