C0 code coverage information

Generated on Sat Jul 07 22:37:29 -0400 2007 with rcov 0.8.0


Code reported as executed by Ruby looks like this...
and this: this line is also marked as covered.
Lines considered as run by rcov, but not reported by Ruby, look like this,
and this: these lines were inferred by rcov (using simple heuristics).
Finally, here's a line marked as not executed.
Name Total lines Lines of code Total coverage Code coverage
IMDB.rb 505 306
95.6% 
94.1% 
  1 #This class takes a name of a movie and gets data from imdb
  2 # Author::    Stephen Becker IV  (mailto:sbecker@x.y@gmail )
  3 # Copyright:: Copyright
  4 # License::   Distributes under the same terms as Ruby
  5 # version:: High Plains Drifter (0.5)
  6 ### IMDB class
  7 ##thank you why for hpricot!
  8 ## thank you for the text()!
  9 require 'rubygems'
 10 #needs version >= 0.5.00 of hpricot
 11 #works with version 6.0 ruby
 12 require 'hpricot'
 13 require 'open-uri'
 14 require 'uri'
 15 require 'pp'
 16 require "rexml/document"
 17 
 18 #read about my suggestion @ http://code.whytheluckystiff.net/hpricot/ticket/37
 19 #this will just run next node X times
 20 #node_at just did not float my boat.
 21 module Hpricot
 22 	module Traverse
 23 		# Returns the node neighboring this node to the south: just below it.
 24 		# This method includes text nodes and comments and such.
 25 		def next_node(loop=1)
 26 			sib = parent.children
 27 			sib[sib.index(self) + loop] if parent
 28 		end
 29 	end
 30 end
 31 
 32 class IMDB
 33 	class << self
 34 		#returns a hash of the differnt titles and what they are
 35 		#   {"Titles (Exact Matches)"=>["Office Space"],
 36 		#"Titles (Partial Matches)"=>["'Office Space': Out of the Office"],
 37 		#"Popular Titles"=>["Office Space"],
 38 		#"Titles (Approx Matches)"=>["Spice Girls: One Hour of Girl Power"]}
 39 		def title_search(title)
 40 			movie_name = title
 41 			movie_name.downcase!
 42 			#_ is used in the folder names
 43 			movie_name.gsub!("_","+")
 44 			movie_name.gsub!(" ","+")
 45 			#first search
 46 			#create a hpricot object
 47 			doc = Hpricot(open(URI.encode("http://www.imdb.com/find?s=all&q=#{movie_name}")))
 48 			z = []
 49 			doc.search("p").each{|x| z.push(x) if  !x.search("table").nil? && !x.search("b").first.nil? &&  x.search("b").first.inner_html.to_s.downcase.include?("title") }
 50 			p = {}
 51 			z.each{|x| p.merge!({x.search("b").first.inner_html => x.search("a").collect{|z| z.inner_html}.delete_if{|u| u.include?("<img")}.uniq})}
 52 			return p
 53 		end
 54 
 55 	end
 56 
 57 	#We save some things for a single load of the page
 58 	#other things do not need to be saved and i will try
 59 	#benchmarking the diffrence one day
 60 	def initialize(movie="",interactive_load=false)
 61 		@imdb_link = nil
 62 		#max guessing links
 63 		@MAXCOUNT = 15
 64 		@html_info_tags = nil
 65 		#does command line menu if true
 66 		@interactive_load = interactive_load
 67 		#not title
 68 		@movie_name = movie
 69 		#html section of imdb that includes the poster year title director
 70 		@short_details = nil
 71 		#html of the whole page. only get this once
 72 		@page_html = nil
 73 		#viewing time
 74 		@movie_length = nil
 75 		#plot
 76 		@movie_plot = nil
 77 		#cast_html that will be replaced with a better representation
 78 		@cast_html = nil
 79 		@movie_title = nil
 80 		@movie_year = nil
 81 	end
 82 
 83 	##Complete html of page
 84 	def page_html
 85 		load_page.to_html
 86 	end
 87 	alias_method :to_html,:page_html
 88 	alias_method :html,:page_html
 89 
 90 	# the link to the poster
 91 	def poster_link
 92 		doc=load_page
 93 		#just following the xpath
 94 		return (doc/"//a[@name='poster']/img").first["src"]
 95 	end
 96 
 97 	#the link
 98 	def imdb_link
 99 		@imdb_link
100 	end
101 
102 
103 	def to_xml
104 		data = REXML::Document.new("<?xml version='1.0' encoding='ISO-8859-1'?>")
105 		base = data.add_element("movie")
106 		base.attributes["name"]=self.title
107 		base.attributes["api_version"]="0.2"
108 		a=base.add_element("cast")
109 		actors.each{|key,value|
110 			b=a.add_element("actor")
111 			b.add_element("name").text=key
112 			b.add_element("role").text=value
113 		}
114 		base.add_element("run_time").text=runtime
115 		base.add_element("plot").text=plot
116 		base.add_element("director").text=directors.join(",")
117 		base.add_element("writer").text=writers.join(",")
118 		#dont think the link plays nice with xml, need to use
119 		#base.add_element("poster link").text=poster
120 		#CGI::escapeHTML(string) or ERB::Util.html_escape
121 		base.add_element("link").text=imdb_link
122 		base.add_element("title").text=title
123 		base.add_element("date").text=date.to_s
124 		base.add_element("rating").text=mpaa
125         base.add_element("user_comments").text=user_comments
126         base.add_element("tag_line").text=tagline
127         base.add_element("plot").text=plot
128         #base.add_element("keywords").text=unescapeHTML(keywords.join(","))
129 		g=base.add_element("genres")
130 		genres.each{|value|
131 			g.add_element("type").text=value
132 		}
133 		result = ""
134 		data.write(result)
135 		return result
136 	end
137 
138 	#returns an array of the possable titles for a search.
139 	#if its not in the list try a differnt name
140 	def title_search
141 		return IMDB.title_search(@movie_name)
142 	end
143 	###############olds
144 
145 
146 	def tagline
147 		#"Work Sucks."
148 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="tagline"}
149 		return z.first.search("h5").first.next_node.to_s.strip unless z.first.nil?
150 		""
151 	end
152 
153 	#returns an array of genres
154 	def genre
155 		#["Comedy", "Crime"]
156 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="genre"}
157 		return  z.first.search("a").select{|z| !z.classes.include?("inline")}.collect{|b| b.inner_html} unless z.first.nil?
158 		[]
159 	end
160 	alias_method :genres,:genre
161 
162 	#returns a date object with the release date
163 	def release_date
164 		#19 February 1999 (USA)
165 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="release_date"}
166 		return Date.new if z.first.nil?
167 		date2 = z.first.search("h5").first.next_node.to_s.strip.split
168 		day = date2[0]
169 		month = Date::MONTHNAMES.index(date2[2])
170 		year = date2[3]
171 		x=Date.parse(date2[0...3].join(" "),"%d %B %Y")
172 		x
173 	end
174 	alias_method :date,:release_date
175 
176 	#returns an array of writers
177 	def writer
178 		#["Mike Judge"]
179 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="writers"}
180 		return z.first.search("a").select{|z| !z.inner_html.include?('(WGA)') && !z.classes.include?("tn15more")}.collect{|b| b.inner_html.to_s.strip}.uniq unless z.first.nil?
181 		[]
182 	end
183 	alias_method :writers,:writer
184 
185 	#returns an array of directors
186 	def director
187 		#["Mike Judge"]
188 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="director"}
189 		return z.first.search("a").select{|z| !z.classes.include?("tn15more")}.collect{|b| b.inner_html.to_s.strip}.uniq unless z.first.nil?
190 		[]
191 	end
192 	alias_method :directors,:director
193 
194 	#a string with what hollywood calls plot these days
195 	def plot
196 		#Comedic tale of company workers who hate their jobs and decide to rebel against their greedy boss.
197 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="plot_outline"}
198 		return z.first.search("h5").first.next_node.to_s.strip unless z.first.nil?
199 		""
200 	end
201 	alias_method :plot_outline,:plot
202 
203 	#not sure about supporting this yet
204 	#["This plot synopsis is empty. Add a synopsis"]
205 	#@html_info_tags.select{|x| text_clean(x.search("h5").text())=="plot_synopsis"}.first.search("a").select{|z| !z.classes.include?("inline")}.collect{|b| b.inner_html}
206 
207 	#an array of key words?
208 	#["Hypnosis", "Cult&#160;Comedy", "Kung&#160;Fu", "Post&#160;It", "Arson"]
209 	def plot_keywords
210 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="plot_keywords"}
211 		return z.first.search("a").select{|z| !z.classes.include?("inline")}.collect{|b| unescapeHTML(b.inner_html)} unless z.first.nil?
212 		[]
213 	end
214 	alias_method :keywords,:plot_keywords
215 
216 	#string i hope.
217 	#2 nominations
218 	def awards
219 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="awards"}
220 		return z.first.search("h5").first.next_node.to_s.strip unless z.first.nil?
221 		""
222 	end
223 
224 	#some say its cheating... @html_info_tags.select{|x| text_clean(x.search("h5").text())=="[[%s]]"}.first.search("h5").first.next_node.to_s.strip#
225 	#string of some user comments
226 	def user_comments
227 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="user_comments"}
228 		return z.first.search("h5").first.next_node.to_s.strip unless z.first.nil?
229 		""
230 	end
231 
232 	#title else where in the world
233 	def also_known_as
234 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="also_known_as"}
235 		return z.first.search("h5").first.next_node.to_s.strip unless z.first.nil?
236 		""
237 	end
238 
239 	alias_method :aka,:also_known_as
240 
241 	#string of the rating and why
242 	def mpaa
243 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="mpaa"}
244 		return z.first.search("h5").first.next_node.to_s.strip unless z.first.nil?
245 		""
246 	end
247 	alias_method :ratings,:mpaa
248 	alias_method :rating,:mpaa
249 
250 	#string # min (hopefully is the format)
251 	def runtime
252 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="runtime"}
253 		return  z.first.search("h5").first.next_node.to_s.strip unless z.first.nil?
254 		""
255 	end
256 
257 	#country as a string. most likely abbrv
258 	def country
259 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="country"}
260 		return z.first.search("h5").first.next_node(2).inner_html unless z.first.nil?
261 		""
262 	end
263 
264 	#orgrinal lang
265 	def language
266 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="language"}
267 		return z.first.search("h5").first.next_node(2).inner_html unless z.first.nil?
268 		""
269 	end
270 	#was it shot in color?
271 	def color
272 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="color"}
273 		return z.first.search("h5").first.next_node(2).inner_html unless z.first.nil?
274 		""
275 	end
276 
277 	#string
278 	def aspect_ratio
279 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="aspect_ratio"}
280 		return z.first.search("h5").first.next_node.to_s.strip unless z.first.nil?
281 		""
282 	end
283 	alias_method :aspect,:aspect_ratio
284 
285 	#string
286 	def company
287 		z = page_info_tags.select{|x| text_clean(x.search("h5").text())=="company"}
288 		return z.first.search("h5").first.next_node(2).inner_html unless z.first.nil?
289 		""
290 	end
291 
292 	#ratings around the world. its a hash!
293 	def certification
294 		cert_hash = {}
295 		begin
296 			#{"UK"=>"15", "Ireland"=>"15", "Chile"=>"TE", "Australia"=>"M", "Argentina"=>"Atp", "Iceland"=>"L", "Sweden"=>"Btl", "Portugal"=>"M/12", "Spain"=>"T", "USA"=>"R", "Finland"=>"S", "France"=>"U", "Peru"=>"PT", "Canada"=>"AA", "Norway"=>"7", "Germany"=>"12", "Netherlands"=>"AL"}
297 			page_info_tags.select{|x| text_clean(x.search("h5").text())=="certification"}.first.search("a").select{|z| !z.classes.include?("inline")}.each{|b| xx = b.inner_html.split(':')
298 				cert_hash.merge!({xx[0]=>xx[1]})
299 			}
300 		rescue Exception => e
301 		end
302 		cert_hash
303 	end
304 	alias_method :certs,:certification
305 	alias_method :certifications,:certification
306 
307 	#just the title incase it is differnt then the one you had used
308 	def title
309 		doc = load_page
310 		title  = doc.search("h1").first.inner_html
311 		clean_html_tags(title)
312 		clean_9_0(title)
313 		title
314 	end
315 
316 	def actors
317         doc = load_page
318         p = {}
319         begin
320         doc.search("table[@class='cast']").first.search("tr").each{|x| p.merge!({x.search("td[@class='nm']/a").inner_html => x.search("td[@class='char']").inner_html})}
321         rescue Exception =>e
322         end
323         p
324 	end
325 	alias_method :cast,:actors
326 
327 	private
328 
329 
330 	#needs better xpath
331 	#gets the upper level to find the poster html. should be broken out in xpaths
332 	def details_based_on_poster_attribute
333 		doc = load_page
334 		data = []
335 		doc.search("table").each{|rate|  (rate/"a").each{|link| data.push(rate) if link.attributes['name']=="poster"  }}
336 		return data
337 	end
338 	#so i dont have to run the info tag search alot
339 	def page_info_tags
340 		return @html_info_tags if @html_info_tags
341 		doc = load_page
342 		@html_info_tags = doc.search("div[@class='info']")
343 	end
344 
345 	#loads the page data once and stores it in @page_html
346 	def load_page
347 		#check if we have page html if so return
348 		if @page_html
349 
350 			doc = Hpricot(@page_html)
351 
352 		else
353 			#alter the passed in name to fit the imdb search
354 			movie_name = @movie_name
355 			movie_name.downcase!
356 			#_ is used in the folder names
357 			movie_name.gsub!("_","+")
358 			movie_name.gsub!(" ","+")
359 			#first search
360 			#create a hpricot object
361 			#for testing...
362 			#File.open("/home/sbecker/moo.html","r") { |f| @page_html=f.read }
363 			#@imdb_link="adff"
364 			#doc=Hpricot(@page_html)
365 
366 			doc = Hpricot(open(URI.encode("http://www.imdb.com/find?s=all&q=#{movie_name}")))
367 			#find all links
368 			elements = doc.search("a")
369 			arr = []
370 			#if the inner html of the link is the same as the movie name add it to the
371 			#list.
372 			elements.each{|link| arr.push(link.attributes['href']) if link.attributes['href'] && link.attributes['href'].include?("title")}
373 			if doc.search("h1").first.nil?
374 				#use the first link with the same name as what we search for create a
375 				#menu system if more or create more then one entry?
376 				if !@interactive_load
377 					@imdb_link = "http://www.imdb.com#{arr.first}"
378 				else
379 					@imdb_link = "http://www.imdb.com"+movie_menu(elements)
380 				end
381 				doc=Hpricot(open(@imdb_link))
382 				@page_html = doc.to_html
383 				@html_info_tags = doc.search("div[@class='info']")
384 				#some movies do not take you to a search page example is robin hood men
385 				#in tights i guess sometimes there is no need for a search page
386 			elsif (doc/"/html/head/title").inner_html!="IMDb Search"
387 				@imdb_link = "http://www.imdb.com/find?s=all&q=#{movie_name}"
388 				@page_html = doc.to_html
389 				@html_info_tags = doc.search("div[@class='info']")
390 			else
391 				#all searches have failed
392 				raise "Error: No Inner HTML links found!"
393 			end
394 		end
395 		return doc
396 	end
397 
398 
399 	#@html_info_tags = doc.search("div[@class='info']")
400 	def movie_menu(elements)
401 		count = 0
402 		array = []
403 		puts "Pick a number to load"
404 		elements.each{|link|
405 			if link.attributes['href'] && link.attributes['href'].include?('/title/') && count<@MAXCOUNT
406 				puts count.to_s+")"+unescapeHTML(link.inner_html)+" "+link.next_node.to_s
407 				array.push(link.attributes['href'])
408 				count = count+1
409 			end
410 		}
411 		number = gets
412 		return array[number.to_i%array.size]
413 	end
414 
415 	#needed to clean a few things
416 	def text_clean(text)
417 		cleaned = text
418 		cleaned.downcase!
419 		clean_html_tags(cleaned)
420 		clean_9_0(cleaned)
421 		cleaned.gsub!(/[^a-z\s]*/,'')
422 		cleaned.strip!
423 		cleaned.gsub!(/ /,'_')
424 		cleaned
425 	end
426 	def clean_html_tags(cleaned)
427 		cleaned.gsub!(/<[^<]*>/,'')
428 	end
429 	def clean_9_0(cleaned)
430 		cleaned.gsub!(/\([^\(]*\)/,'')
431 	end
432 	#Jacked from http://www.rubycentral.com/book/tut_stdtypes.html
433 	def unescapeHTML(string)
434 		str = string.dup
435 		str.gsub!(/&(.*?);/n) {
436 			match = $1.dup
437 			case match
438 			when /\Aamp\z/ni           then '&'
439 			when /\Aquot\z/ni          then '"'
440 			when /\Agt\z/ni            then '>'
441 			when /\Alt\z/ni            then '<'
442 			when /\A#(\d+)\z/n         then Integer($1).chr
443 			when /\A#x([0-9a-f]+)\z/ni then $1.hex.chr
444 			end
445 		}
446 		str
447 	end
448 
449 end
450 #begin
451 #	["scotland pa","MaLlRaTs","Doctor+Zhivago","blue_velvet","die hard","die hard 2","ghost in the shell","pi","office_space","11:14","high plains drifter"].each{|movie|
452 #		puts movie
453 #		movie=IMDB.new(movie)# or office_space or OFFICE_SPACE
454 #		#pp movie.get_links
455 #		pp movie.actors
456 #		pp movie.title
457 #		pp movie.poster_link
458 #		pp movie.rating
459 #		pp movie.aka
460 #		pp movie.also_known_as
461 #		pp movie.aspect
462 #		pp movie.aspect_ratio
463 #		pp movie.awards
464 #		pp movie.certification
465 #		pp movie.certifications
466 #		pp movie.certs
467 #		pp movie.color
468 #		pp movie.company
469 #		pp movie.country
470 #		pp movie.date
471 #		pp movie.director
472 #		pp movie.directors
473 #		pp movie.genre
474 #		pp movie.genres
475 #		pp movie.imdb_link
476 #		pp movie.keywords
477 #		pp movie.language
478 #		pp movie.mpaa
479 #		pp movie.page_html
480 #		pp movie.plot
481 #		pp movie.plot_keywords
482 #		pp movie.plot_outline
483 #		pp movie.poster_link
484 #		pp movie.rating
485 #		pp movie.ratings
486 #		pp movie.release_date
487 #		pp movie.runtime
488 #		pp movie.tagline
489 #		pp movie.user_comments
490 #		pp movie.writer
491 #		pp movie.writers
492 #		pp movie.title_search
493 #		pp movie.to_xml
494 #		sleep 30
495 #	}
496 #rescue Exception=>e
497 #	puts "------------"+e
498 #	puts e.backtrace*"\n"
499 #end
500 #
501 #begin
502 #	pp IMDB.title_search("white strips")
503 #rescue Exception => e
504 #	pp e
505 #end

Generated using the rcov code coverage analysis tool for Ruby version 0.8.0.

Valid XHTML 1.0! Valid CSS!