aboutsummaryrefslogtreecommitdiffstats
path: root/lib/unicode_normalize.rb
blob: 3f359a0ff32a05eb6a4994428ca239a899798af5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# coding: utf-8
# frozen_string_literal: false

# Copyright Ayumu Nojima (野島 歩) and Martin J. Dürst (duerst@it.aoyama.ac.jp)

#--
# additions to class String for Unicode normalization
#++
class String

  # :call-seq:
  #    str.unicode_normalize!(form=:nfc)
  #
  # Destructive version of String#unicode_normalize, doing Unicode
  # normalization in place.
  #
  def unicode_normalize!(form = :nfc)
    require 'unicode_normalize/normalize.rb'
    unicode_normalize! form
  end

  # :call-seq:
  #    str.unicode_normalized?(form=:nfc)
  #
  # Checks whether +str+ is in Unicode normalization form +form+,
  # which can be any of the four values +:nfc+, +:nfd+, +:nfkc+, or +:nfkd+.
  # The default is +:nfc+.
  #
  # If the string is not in a Unicode Encoding, then an Exception is raised.
  # For details, see String#unicode_normalize.
  #
  #   "a\u0300".unicode_normalized?        #=> false
  #   "a\u0300".unicode_normalized?(:nfd)  #=> true
  #   "\u00E0".unicode_normalized?         #=> true
  #   "\u00E0".unicode_normalized?(:nfd)   #=> false
  #   "\xE0".force_encoding('ISO-8859-1').unicode_normalized?
  #                                        #=> Encoding::CompatibilityError raised
  #
  def unicode_normalized?(form = :nfc)
    require 'unicode_normalize/normalize.rb'
    unicode_normalized? form
  end
end