[ruby/rdoc] Place a space between certain character class letters only

https://github.com/ruby/rdoc/commit/1f568e049d
author: Nobuyoshi Nakada <nobu@ruby-lang.org> 2023-11-27 20:15:54 +0900
committer: git <svn-admin@ruby-lang.org> 2023-11-27 15:58:31 +0000
commit: 196c4aeb766a66b3557ddab61086db58c7a08226 (patch)
tree: 46545600783f75d9d928af9ff3c21029315a58f0
parent: 7835ebce97a6e6132d2bc7bdbef115f3f47cc6c2 (diff)
download: ruby-196c4aeb766a66b3557ddab61086db58c7a08226.tar.gz
4 files changed, 34 insertions, 6 deletions
diff --git a/lib/rdoc/markup/parser.rb b/lib/rdoc/markup/parser.rb
index 0029df7e65..2ad4a65808 100644
--- a/lib/rdoc/markup/parser.rb
+++ b/lib/rdoc/markup/parser.rb
@@ -218,7 +218,7 @@ class RDoc::Markup::Parser
 
         break if peek_token.first == :BREAK
 
-        data << ' ' if skip :NEWLINE
+        data << ' ' if skip :NEWLINE and /#{SPACE_SEPARATED_LETTER_CLASS}\z/o.match?(data)
       else
         unget
         break
diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb
index 6c9f5733a2..fb38924a04 100644
--- a/lib/rdoc/markup/to_html.rb
+++ b/lib/rdoc/markup/to_html.rb
@@ -202,7 +202,9 @@ class RDoc::Markup::ToHtml < RDoc::Markup::Formatter
   def accept_paragraph paragraph
     @res << "\n<p>"
     text = paragraph.text @hard_break
-    text = text.gsub(/\r?\n/, ' ')
+    text = text.gsub(/(#{SPACE_SEPARATED_LETTER_CLASS})?\K\r?\n(?=(?(1)(#{SPACE_SEPARATED_LETTER_CLASS})?))/o) {
+      defined?($2) && ' '
+    }
     @res << to_html(text)
     @res << "</p>\n"
   end
diff --git a/lib/rdoc/text.rb b/lib/rdoc/text.rb
index 0bc4aba428..6f1a2b8d15 100644
--- a/lib/rdoc/text.rb
+++ b/lib/rdoc/text.rb
@@ -309,4 +309,10 @@ module RDoc::Text
     res.join.strip
   end
 
+  ##
+  # Character class to be separated by a space when concatenating
+  # lines.
+
+  SPACE_SEPARATED_LETTER_CLASS = /[\p{Nd}\p{Lc}\p{Pc}]/
+
 end
diff --git a/test/rdoc/test_rdoc_markup_to_html.rb b/test/rdoc/test_rdoc_markup_to_html.rb
index 6897c8132e..2dd8cf922d 100644
--- a/test/rdoc/test_rdoc_markup_to_html.rb
+++ b/test/rdoc/test_rdoc_markup_to_html.rb
@@ -257,7 +257,7 @@ class TestRDocMarkupToHtml < RDoc::Markup::FormatterTestCase
   end
 
   def accept_paragraph_break
-    assert_equal "\n<p>hello<br> world</p>\n", @to.res.join
+    assert_equal "\n<p>hello<br>world</p>\n", @to.res.join
   end
 
   def accept_paragraph_i
@@ -391,11 +391,31 @@ class TestRDocMarkupToHtml < RDoc::Markup::FormatterTestCase
   end
 
   def test_accept_paragraph_newline
-    @to.start_accepting
+    hellos = ["hello", "\u{393 3b5 3b9 3ac} \u{3c3 3bf 3c5}"]
+    worlds = ["world", "\u{3ba 3cc 3c3 3bc 3bf 3c2}"]
+    ohayo, sekai = %W"\u{304a 306f 3088 3046} \u{4e16 754c}"
+
+    hellos.product(worlds) do |hello, world|
+      @to.start_accepting
+      @to.accept_paragraph para("#{hello}\n", "#{world}\n")
+      assert_equal "\n<p>#{hello} #{world}</p>\n", @to.res.join
+    end
+
+    hellos.each do |hello|
+      @to.start_accepting
+      @to.accept_paragraph para("#{hello}\n", "#{sekai}\n")
+      assert_equal "\n<p>#{hello}#{sekai}</p>\n", @to.res.join
+    end
 
-    @to.accept_paragraph para("hello\n", "world\n")
+    worlds.each do |world|
+      @to.start_accepting
+      @to.accept_paragraph para("#{ohayo}\n", "#{world}\n")
+      assert_equal "\n<p>#{ohayo}#{world}</p>\n", @to.res.join
+    end
 
-    assert_equal "\n<p>hello world </p>\n", @to.res.join
+    @to.start_accepting
+    @to.accept_paragraph para("#{ohayo}\n", "#{sekai}\n")
+    assert_equal "\n<p>#{ohayo}#{sekai}</p>\n", @to.res.join
   end
 
   def test_accept_heading_output_decoration
author	Nobuyoshi Nakada <nobu@ruby-lang.org>	2023-11-27 20:15:54 +0900
committer	git <svn-admin@ruby-lang.org>	2023-11-27 15:58:31 +0000
commit	196c4aeb766a66b3557ddab61086db58c7a08226 (patch)
tree	46545600783f75d9d928af9ff3c21029315a58f0
parent	7835ebce97a6e6132d2bc7bdbef115f3f47cc6c2 (diff)
download	ruby-196c4aeb766a66b3557ddab61086db58c7a08226.tar.gz