#!/usr/bin/env ruby
require "net/http"
require "uri"
require "cgi"
require "yaml"
start = 100
url = URI.parse("http://thepiratebay.org/")
cats = Hash.new
#starting from 100 look for empty pages.
while start<700
Net::HTTP.start(url.host, url.port) {|http|
res = http.get("/browse/#{start}")
if res.body.size > 0
cats[res.body.scan(/
(.*?)<\/title>/).to_s] = start
start += 1
else
start = ((start/100) + 1) * 100
end
}
pp start
end
# starting from 699 work backwards for empty pages
start = 699
while start>0
Net::HTTP.start(url.host, url.port) {|http|
res = http.get("/browse/#{start}")
if res.body.size > 0
cats[res.body.scan(/(.*?)<\/title>/).to_s] = start
start -= 1
else
start = ((start/100) - 1) * 100
start -= 1
end
}
pp start
end
#convert poorly named categories hash to something
# slightly useful
master_hash = Hash.new
master_hash["_RAW"] = cats
master_hash["_RAW"].keys.each{|raw_key|
html = CGI::unescapeHTML(raw_key).split(">")
key = html.first.strip
master_hash[key] ||= Hash.new
name = html.last.strip
master_hash[key][name.sub(" - TPB","")] = master_hash["_RAW"][raw_key]
}
File.open(File.join(File.dirname(__FILE__),"config","pirate_categories.yml"),"w+"){|f|
f.puts( YAML::dump(master_hash))
}