require 'rubygems' #needs version >= 0.5.00 of hpricot #works with version 6.0 ruby require 'hpricot' require 'open-uri' require 'uri' require 'pp' require "rexml/document" require "date" require "yaml" #http://thepiratebay.org/s/?q=cow&searchTitle=on&page=0&orderby=99 #read about my suggestion @ http://code.whytheluckystiff.net/hpricot/ticket/37 #this will just run next node X times #node_at just did not float my boat. module Hpricot module Traverse # Returns the node neighboring this node to the south: just below it. # This method includes text nodes and comments and such. def next_node(loop=1) sib = parent.children sib[ sib.index(self) + loop ] if parent end end end module PirateBay BASE_URL = URI::parse("http://thepiratebay.org/") def self.find(search_term,type) end def self.get_browse_categories(yaml_file = nil) file = yaml_file ||File.join(File.dirname(__FILE__),"config","pirate_categories.yml") raise "No categories file" unless File.exists?(file) file_socket = File.open(file) YAML.load(file_socket).delete_if{|k,v| k =~ /^_/} end def self.top_100 end def self.recent(page=1) html = "" page -= 1 uri = BASE_URL.clone uri.path = "/recent/#{page}" html = Hpricot(open(uri.to_s)) torrents = Array.new html.search("table[@id='searchResult']/tr").each{|tr| if tr.search("img[@alt='Next']").first next else torrents.push(Torrent.process_from_table(tr)) end } torrents end def self.tag_cloud end def self.search_cloud end def self.legal_threats end class Torrent def self.process_from_table(html) torrent_hash = {} url_to_category = html.search("td/a[@title='More from this category']").first.attributes["href"] torrent_hash[:category] = PirateBay::Categories::CATEGORIES_BY_NUMBER[url_to_category.split("/").last] url_to_torrent = html.search("td/a[@title='Download this torrent']").first.attributes["href"] torrent_hash[:link] = url_to_torrent url_to_details = html.search("td/a[@class='detLink']").first torrent_hash[:details_link] = url_to_details.attributes["href"] torrent_hash[:display_name] = url_to_details.inner_text last_three = html.search("td[@align='right']") torrent_hash[:size] = last_three.first.inner_html.sub(" "," ") #down size everything to megabytes. eazy to keep track of. # TODO add support for B and KiB? if torrent_hash[:size].downcase.include?("gib") torrent_hash[:size] = "#{torrent_hash[:size].to_f * 1000} MiB" #not sure if they have TIB elsif torrent_hash[:size].downcase.include?("tib") torrent_hash[:size] = "#{torrent_hash[:size].to_f * 1000000} MiB" end torrent_hash[:seeders] = last_three[1].inner_html.to_i torrent_hash[:leechers] = last_three[2].inner_html.to_i date = html.search("td/a[@title='Download this torrent']").first.parent.previous_sibling.inner_text # we get 0?mins?ago bs not sure why torrent_hash[:uploaded_time] = date_string_to_date_object(date.gsub("?"," ")) #TODO file_name comments Torrent.new(torrent_hash) end def self.process_from_details_page(html) end attr_accessor :link, :uploaded_time, :category, :size, :seeders, :leechers attr_accessor :comments, :file_name, :display_name, :details_link def initialize(torrent_hash) @link = torrent_hash[:link] @uploaded_time = torrent_hash[:uploaded_time] @category = torrent_hash[:category] @size = torrent_hash[:size] @seeders = torrent_hash[:seeders] @leechers = torrent_hash[:leechers] @comments = torrent_hash[:comments] @file_name = torrent_hash[:file_name] @display_name = torrent_hash[:display_name] @details_link = torrent_hash[:details_link] end #sorts by MiB def <=>(other) self.size.to_f <=> other.size.to_f end private def self.date_string_to_date_object(string) offset = Time.new.gmt_offset/3600 case string when /^(\d{1,2}) mins ago/i mins = $1.to_i #grrr this is fed.TBD Time.at(Time.now.to_i + offset*60*60 - mins*60) # today at time gmt when /^today (\d\d):(\d\d)/i hour = sprintf("%02d",($1.to_i + offset)) min = $2 Time.parse("#{hour}:#{min}") when /^y-day (\d\d):(\d\d)/i hour = sprintf("%02d",($1+offset)) min = $2 today = Time.parse("#{hour}:#{min}") Time.at(today.to_i-(24*60*60)) when /^(\d\d)-(\d\d) (\d\d):(\d\d)/ months = Time::MonthValue.invert Time.gm(Time.now.year,$1,months[$2],$3,$4,0,0) when /^(\d\d)-(\d\d) (\d\d\d\d)/ months = Time::MonthValue.invert Time.gm($3,$1,months[$2],0,0,0,0) end end end class Categories file = File.join(File.dirname(__FILE__),"config","pirate_categories.yml") if File.exists?(file) file_socket = File.open(file) categories = YAML.load(file_socket).delete_if{|k,v| k =~ /^_/} TOP_LEVEL_CATEGORIES = categories.keys #I know I can do this in one line all_categories = Hash.new categories.values.each{|hash| all_categories.merge!(hash)} ALL_CATEGORIES = all_categories CATEGORIES_BY_NUMBER = ALL_CATEGORIES.invert end end end if $0==__FILE__ pp PirateBay::get_browse_categories pp PirateBay::Categories::ALL_CATEGORIES pp PirateBay::Categories::TOP_LEVEL_CATEGORIES pp PirateBay::Categories::CATEGORIES_BY_NUMBER recents = PirateBay::recent pp recents pp recents.sort pp recents.sort_by{|x| x.seeders/x.leechers rescue 0} end