![]()

require 'ruby-tf-idf'
corpus = [
'A big enough hammer can usually fix anything',
'A bird in the hand is a big mistake .',
'A bird in the hand is better than one overhead!',
'A career is a job that takes about 20 more hours a week.',
'A clean desk is a sign of a cluttered desk drawer.',
'A cynic smells flowers and looks for the casket.'
]
limit = 3 #每个文档最多的三个词
exclude_stop_words = false
@t = RubyTfIdf::TfIdf.new(corpus,limit,exclude_stop_words)
output = @t.tf_idf
[
{"anything"=>0.7781512503836436,"fix"=>0.7781512503836436, "enough"=>0.7781512503836436},
{"mistake"=>0.7781512503836436, "bird"=>0.47712125471966244, "in"=>0.47712125471966244},
{"overhead!"=>0.7781512503836436, "better"=>0.7781512503836436, "one"=>0.7781512503836436},
{"week"=>0.7781512503836436, "career"=>0.7781512503836436, "hours"=>0.7781512503836436},
{"desk"=>1.5563025007672873, "drawer"=>0.7781512503836436, "clean"=>0.7781512503836436},
{"casket"=>0.7781512503836436, "cynic"=>0.7781512503836436, "smells"=>0.7781512503836436}
]
ruby gem : measurable
require 'measurable'
# Calculate the distance between two points in space.
Measurable.euclidean([1, 1], [0, 0]) # => 1.41421
# Get the cosine distance between
Measurable.cosine_distance([1, 2], [2, 3]) # => 0.007722123286332261