how to follow meta refreshes in Python

Here is a solution using BeautifulSoup and httplib2 (and certificate based authentication):

import BeautifulSoup
import httplib2

def meta_redirect(content):
    soup  = BeautifulSoup.BeautifulSoup(content)

    result=soup.find("meta",attrs={"http-equiv":"Refresh"})
    if result:
        wait,text=result["content"].split(";")
        if text.strip().lower().startswith("url="):
            url=text.strip()[4:]
            return url
    return None

def get_content(url, key, cert):
    
    h=httplib2.Http(".cache")
    h.add_certificate(key,cert,"")
    
    resp, content = h.request(url,"GET")
    
    # follow the chain of redirects
    while meta_redirect(content):
        resp, content = h.request(meta_redirect(content),"GET") 
            
    return content  

Leave a Comment