How do I parse an HTML table with Nokogiri?

#!/usr/bin/ruby1.8

require 'nokogiri'
require 'pp'

html = <<-EOS
  (The HTML from the question goes here)
EOS

doc = Nokogiri::HTML(html)
rows = doc.xpath('//table/tbody[@id="threadbits_forum_251"]/tr')
details = rows.collect do |row|
  detail = {}
  [
    [:title, 'td[3]/div[1]/a/text()'],
    [:name, 'td[3]/div[2]/span/a/text()'],
    [:date, 'td[4]/text()'],
    [:time, 'td[4]/span/text()'],
    [:number, 'td[5]/a/text()'],
    [:views, 'td[6]/text()'],
  ].each do |name, xpath|
    detail[name] = row.at_xpath(xpath).to_s.strip
  end
  detail
end
pp details

# => [{:time=>"23:35",
# =>   :title=>"Vb4 Gold Released",
# =>   :number=>"24",
# =>   :date=>"06 Jan 2010",
# =>   :views=>"1,320",
# =>   :name=>"Paul M"}]

Leave a Comment