diff options
Diffstat (limited to 'lib/did_you_mean/spell_checker.rb')
-rw-r--r-- | lib/did_you_mean/spell_checker.rb | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/lib/did_you_mean/spell_checker.rb b/lib/did_you_mean/spell_checker.rb new file mode 100644 index 0000000000..e5106abba2 --- /dev/null +++ b/lib/did_you_mean/spell_checker.rb @@ -0,0 +1,46 @@ +# frozen-string-literal: true + +require_relative "levenshtein" +require_relative "jaro_winkler" + +module DidYouMean + class SpellChecker + def initialize(dictionary:) + @dictionary = dictionary + end + + def correct(input) + input = normalize(input) + threshold = input.length > 3 ? 0.834 : 0.77 + + words = @dictionary.select { |word| JaroWinkler.distance(normalize(word), input) >= threshold } + words.reject! { |word| input == word.to_s } + words.sort_by! { |word| JaroWinkler.distance(word.to_s, input) } + words.reverse! + + # Correct mistypes + threshold = (input.length * 0.25).ceil + corrections = words.select { |c| Levenshtein.distance(normalize(c), input) <= threshold } + + # Correct misspells + if corrections.empty? + corrections = words.select do |word| + word = normalize(word) + length = input.length < word.length ? input.length : word.length + + Levenshtein.distance(word, input) < length + end.first(1) + end + + corrections + end + + private + + def normalize(str_or_symbol) #:nodoc: + str = str_or_symbol.to_s.downcase + str.tr!("@", "") + str + end + end +end |