require 'rubygems' require 'hpricot' require 'active_support' require 'yaml' require 'open-uri' ISSUES_URL = "http://www.ets.org/portal/site/ets/menuitem.1488512ecfd5b8849a77b13bc3921509/?vgnextoid=b63ce7b9edfb5010VgnVCM10000022f95190RCRD&vgnextchannel=06a7e3b5f64f4010VgnVCM10000022f95190RCRD" ARGUMENTS_URL = "http://www.ets.org/portal/site/ets/menuitem.1488512ecfd5b8849a77b13bc3921509/?vgnextoid=ef752d3631df4010VgnVCM10000022f95190RCRD&vgnextchannel=06a7e3b5f64f4010VgnVCM10000022f95190RCRD" class QuestionsParser attr_accessor :url def initialize(url) self.url = url end def save_file(filename="questions.yml") data = {'new' => questions} File.open(filename, "w") { |f| YAML.dump(data, f) } end def questions paragraphs.map(&:inner_text).select{ |p| p =~ /^"[^"]+"/ } end def paragraphs doc = Hpricot(content) doc / "div.secondary_background p" end def content @content ||= open(url) end end # Default GRE stuff QuestionsParser.new(ISSUES_URL).save_file("issues.yml") QuestionsParser.new(ARGUMENTS_URL).save_file("arguments.yml")