aboutsummaryrefslogtreecommitdiffstats
path: root/lib/reline/unicode.rb
diff options
context:
space:
mode:
authorYusuke Endoh <mame@ruby-lang.org>2020-10-21 13:29:19 +0900
committeraycabta <aycabta@gmail.com>2020-12-05 02:58:58 +0900
commit76cac4c05a7be61a94a709b8b850118ad0bfa684 (patch)
tree3131f03a4c77ab2bccc1a66d4e626a823d096379 /lib/reline/unicode.rb
parentb3e0db80606614f11412604f1657a135002326e9 (diff)
downloadruby-76cac4c05a7be61a94a709b8b850118ad0bfa684.tar.gz
[ruby/reline] Improve the performance of `get_mbchar_width`
It is about three times faster to use one big regexp instead of sequential matching. https://github.com/ruby/reline/commit/e36f6c0707
Diffstat (limited to 'lib/reline/unicode.rb')
-rw-r--r--lib/reline/unicode.rb40
1 files changed, 25 insertions, 15 deletions
diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb
index cd8c27e85b..df2f6719a4 100644
--- a/lib/reline/unicode.rb
+++ b/lib/reline/unicode.rb
@@ -72,20 +72,32 @@ class Reline::Unicode
}.join
end
+ require 'reline/unicode/east_asian_width'
+
+ MBCharWidthRE = /
+ (?<width_2_1>
+ [#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...)
+ )
+ | (?<width_3>^\u{2E3B}) (?# THREE-EM DASH)
+ | (?<width_0>^\p{M})
+ | (?<width_2_2>
+ #{ EastAsianWidth::TYPE_F }
+ | #{ EastAsianWidth::TYPE_W }
+ )
+ | (?<width_1>
+ #{ EastAsianWidth::TYPE_H }
+ | #{ EastAsianWidth::TYPE_NA }
+ | #{ EastAsianWidth::TYPE_N }
+ )
+ /x
+
def self.get_mbchar_width(mbchar)
- case mbchar.encode(Encoding::UTF_8)
- when *EscapedChars # ^ + char, such as ^M, ^H, ^[, ...
- 2
- when /^\u{2E3B}/ # THREE-EM DASH
- 3
- when /^\p{M}/
- 0
- when EastAsianWidth::TYPE_A
- Reline.ambiguous_width
- when EastAsianWidth::TYPE_F, EastAsianWidth::TYPE_W
- 2
- when EastAsianWidth::TYPE_H, EastAsianWidth::TYPE_NA, EastAsianWidth::TYPE_N
- 1
+ m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE)
+ case
+ when m[:width_2_1], m[:width_2_2] then 2
+ when m[:width_3] then 3
+ when m[:width_0] then 0
+ when m[:width_1] then 1
else
nil
end
@@ -591,5 +603,3 @@ class Reline::Unicode
[byte_size, width]
end
end
-
-require 'reline/unicode/east_asian_width'