how to follow meta refreshes in Python
Here is a solution using BeautifulSoup and httplib2 (and certificate based authentication): import BeautifulSoup import httplib2 def meta_redirect(content): soup = BeautifulSoup.BeautifulSoup(content) result=soup.find(“meta”,attrs={“http-equiv”:”Refresh”}) if result: wait,text=result[“content”].split(“;”) if text.strip().lower().startswith(“url=”): url=text.strip()[4:] return url return None def get_content(url, key, cert): h=httplib2.Http(“.cache”) h.add_certificate(key,cert,””) resp, content = h.request(url,”GET”) # follow the chain of redirects while meta_redirect(content): resp, content = h.request(meta_redirect(content),”GET”) return … Read more