Parse HTML content in VBA

Just a couple things that hopefully will get you in the right direction:

  • clean up a bit: remove the readystate property testing loop. The value returned by the readystate property will never change in this context – code will pause after the send instruction, to resume only once the server response is received, or has failed to do so. The readystate property will be set accordingly, and the code will resume execution. You should still test for the ready state, but the loop is just unnecessary

  • target the right HTML elements: you are searching through the tr elements – while the logic of how you use these elements in your code actually looks to point to td elements

  • make sure the properties are actually available for the objects you are using them on: to help you with this, try and declare all your variable as specific objects instead of the generic Object. This will activate intellisense. If you have a difficult time finding the actual name of your object as defined in the relevant library in a first place, declare it as the generic Object, run your code, and then inspect the type of the object – by printing typename(your_object) to the debug window for instance. This should put you on your way

I have also included some code below that may help. If you still can’t get this to work and you can share your urls – plz do that.

Sub getInfoWeb()

    Dim cell As Integer
    Dim xhr As MSXML2.XMLHTTP60
    Dim doc As MSHTML.HTMLDocument
    Dim table As MSHTML.HTMLTable
    Dim tableCells As MSHTML.IHTMLElementCollection
    
    Set xhr = New MSXML2.XMLHTTP60
   
    For cell = 1 To 5
        
        ItemNbr = Cells(cell, 3).Value
        
        With xhr
        
            .Open "GET", "http://www.example.com/?item=" & ItemNbr, False
            .send
            
            If .readyState = 4 And .Status = 200 Then
                Set doc = New MSHTML.HTMLDocument
                doc.body.innerHTML = .responseText
            Else
                MsgBox "Error" & vbNewLine & "Ready state: " & .readyState & _
                vbNewLine & "HTTP request status: " & .Status
            End If
            
        End With
        
        Set table = doc.getElementById("list-table")
        Set tableCells = table.getElementsByTagName("td")
        
        For Each tableCell In tableCells
            If tableCell.getAttribute("title") = "Material" Then
                Cells(cell, 14).Value = tableCell.NextSibling.innerHTML
            End If
        Next tableCell
    
    Next cell
    
End Sub

EDIT: as a follow-up to the further information you provided in the comment below – and the additional comments I have added

'Determine your product number
    'Open an xhr for your source url, and retrieve the product number from there - search for the tag which
    'text include the "productnummer:" substring, and extract the product number from the outerstring
    'OR
    'if the product number consistently consists of the fctkeywords you are entering in your source url
    'with two "0" appended - just build the product number like that
'Open an new xhr for this url "http://www.pfconcept.com/cgi-bin/wspd_pcdb_cgi.sh/y/y2productspec-ajax.p?itemc=" & product_number & "&_search=false&rows=-1&page=1&sidx=&sord=asc"
'Load the response in an XML document, and retrieve the material information

Sub getInfoWeb()

    Dim xhr As MSXML2.XMLHTTP60
    Dim doc As MSXML2.DOMDocument60
    Dim xmlCell As MSXML2.IXMLDOMElement
    Dim xmlCells As MSXML2.IXMLDOMNodeList
    Dim materialValueElement As MSXML2.IXMLDOMElement
    
    Set xhr = New MSXML2.XMLHTTP60
        
        With xhr
            
            .Open "GET", "http://www.pfconcept.com/cgi-bin/wspd_pcdb_cgi.sh/y/y2productspec-ajax.p?itemc=10031700&_search=false&rows=-1&page=1&sidx=&sord=asc", False
            .send
            
            If .readyState = 4 And .Status = 200 Then
                Set doc = New MSXML2.DOMDocument60
                doc.LoadXML .responseText
            Else
                MsgBox "Error" & vbNewLine & "Ready state: " & .readyState & _
                vbNewLine & "HTTP request status: " & .Status
            End If
            
        End With
        
        Set xmlCells = doc.getElementsByTagName("cell")

        For Each xmlCell In xmlCells
            If xmlCell.Text = "Materiaal" Then
                Set materialValueElement = xmlCell.NextSibling
            End If
        Next
        
        MsgBox materialValueElement.Text
    
End Sub

EDIT2: an alternative automating IE

Sub searchWebViaIE()
    Dim ie As SHDocVw.InternetExplorer
    Dim doc As MSHTML.HTMLDocument
    Dim anchors As MSHTML.IHTMLElementCollection
    Dim anchor As MSHTML.HTMLAnchorElement
    Dim prodSpec As MSHTML.HTMLAnchorElement
    Dim tableCells As MSHTML.IHTMLElementCollection
    Dim materialValueElement As MSHTML.HTMLTableCell
    Dim tableCell As MSHTML.HTMLTableCell
    
    Set ie = New SHDocVw.InternetExplorer
    
    With ie
        .navigate "http://www.pfconcept.com/cgi-bin/wspd_pcdb_cgi.sh/y/y2facetmain.p?fctkeywords=100317&world=general#tabs-4"
        .Visible = True
        
        Do While .readyState <> READYSTATE_COMPLETE Or .Busy = True
            DoEvents
        Loop
        
        Set doc = .document
        
        Set anchors = doc.getElementsByTagName("a")
        
        For Each anchor In anchors
            If InStr(anchor.innerHTML, "Product Specificatie") <> 0 Then
                anchor.Click
                Exit For
            End If
        Next anchor
        
        Do While .readyState <> READYSTATE_COMPLETE Or .Busy = True
            DoEvents
        Loop
    
    End With
        
    For Each anchor In anchors
        If InStr(anchor.innerHTML, "Product Specificatie") <> 0 Then
            Set prodSpec = anchor
        End If
    Next anchor
    
    Set tableCells = doc.getElementById("list-table").getElementsByTagName("td")
    
    If Not tableCells Is Nothing Then
        For Each tableCell In tableCells
            If tableCell.innerHTML = "Materiaal" Then
                Set materialValueElement = tableCell.NextSibling
            End If
        Next tableCell
    End If
    
    MsgBox materialValueElement.innerHTML
    
End Sub

Leave a Comment