Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
source 'https://rubygems.org'

gem 'pry-byebug'
2 changes: 1 addition & 1 deletion data/stopwords.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your,one,out,more,now,first,two,very,such,same,shall,upon,before,therefore,great,made,even,same,work,make,being,through,here,way,true,see,time,those,place,much,without,body,whole,another,thus,set,new,given,both,above,well,part,between,end,order,each,form,gutenberg
a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your,one,out,more,now,first,two,very,such,same,shall,upon,before,therefore,great,made,even,same,work,make,being,through,here,way,true,see,time,those,place,much,without,body,whole,another,thus,set,new,given,both,above,well,part,between,end,order,each,form,gutenberg,project,
3 changes: 1 addition & 2 deletions gutenberg.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,12 @@ def run!(predictor_klass, opts={})
predictor.train!
puts "Training took #{Time.now - start_time} seconds."

puts "Predicting..."
start_time = Time.now
accuracy = predictor.predict_test_set(opts)
puts "Predictions took #{Time.now - start_time} seconds."
puts "Accuracy: #{accuracy}"
end

run!(SimplePredictor)
# run!(SimplePredictor)
run!(ComplexPredictor, debug: true)

3 changes: 3 additions & 0 deletions lib/Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
source 'https://rubygems.org'

gem 'pry-byebug'
62 changes: 51 additions & 11 deletions lib/complex_predictor.rb
Original file line number Diff line number Diff line change
@@ -1,22 +1,62 @@
require_relative 'predictor'
require 'pry-byebug'

class ComplexPredictor < Predictor
# Public: Trains the predictor on books in our dataset. This method is called
# before the predict() method is called.
#
# Returns nothing.

def train!
@data = {}
@top_token_words = {}

@all_books.each do |category, books|
@data[category] = {
token: {}
}
books.each do |filename, tokens|
tokens.each do |token|
# i thought nested if statements were no-nos so patrick helped me with this part.
if good_token?(token)
if @data[category][:token][token]
@data[category][:token][token] += 1
else
@data[category][:token][token] = 1
end
end
end
end
end
top_tokens = nil

@data.each do |category, token|
# binding.pry
top_tokens = @data[category][:token].sort_by { |token, count| count }.reverse[0..15]
@top_token_words[category] = []
# yusef helped me through a tricky time with arrays and string and stuff in my hash value
top_tokens.each do |x|
if @top_token_words[category]
@top_token_words[category] << x[0]
end
end
end
end

# Public: Predicts category.
#
# tokens - A list of tokens (words).
#
# Returns a category.
def predict(tokens)
# Always predict astronomy, for now.
:astronomy

predicted_category = nil

max_match = 0
@top_token_words.each do |category, top_words|
count_match = 0
tokens.each do |token|
if top_words.include? token
count_match += 1
end
end
if count_match > max_match
predicted_category = category
max_match = count_match
end
end
predicted_category.to_sym
end
end