From f71bd7477e84eb1cd10fa27e79b1e081ee51793a Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Wed, 7 Aug 2019 02:14:39 +0900 Subject: RDoc::Parser::C: Integrate do_classes and do_modules by one regexp match The full scan of the C source code (`@content.scan`) is very slow. The old code invokes the scan six times in `do_classes` and `do_modules`. This change integrates the six scans into one by merging the regexps. The integrated regexp is a bit hard to maintain, but the speed up is significant: approx. 30 sec -> 20 sec in Ruby's `make rdoc`. In addition, this change omits `do_boot_defclass` unless the file name is `class.c`. `boot_defclass` is too specific to Ruby's source code, so RDoc should handle it as a special case. Before this change: TOTAL (pct) SAMPLES (pct) FRAME 858 (13.6%) 858 (13.6%) (garbage collection) 292 (4.6%) 264 (4.2%) RDoc::Parser::C#do_define_class 263 (4.2%) 250 (3.9%) RDoc::Parser::C#do_define_module 275 (4.3%) 241 (3.8%) RDoc::Parser::C#do_define_class_under 248 (3.9%) 237 (3.7%) RDoc::Parser::C#do_define_module_under 234 (3.7%) 234 (3.7%) RDoc::Parser::C#gen_body_table 219 (3.5%) 219 (3.5%) Ripper::Lexer#state_obj 217 (3.4%) 216 (3.4%) RDoc::Parser::C#do_struct_define_without_accessor 205 (3.2%) 205 (3.2%) RDoc::Parser::C#do_boot_defclass 205 (3.2%) 205 (3.2%) RDoc::Parser::C#do_singleton_class The six methods take approx. 22.2%. `do_define_class` (4.2%) + `do_define_class_under` (3.8%) + `do_define_module` (3,9$) + `do_define_module_under` (3.7%) + `do_struct_define_without_accessor` (3.4%) + `do_singleton_class` (3.2%) After this change, the methods are integrated to `do_classes_and_modules` which takes only 5.8%. TOTAL (pct) SAMPLES (pct) FRAME 812 (16.7%) 812 (16.7%) (garbage collection) 355 (7.3%) 284 (5.8%) RDoc::Parser::C#do_classes_and_modules 225 (4.6%) 225 (4.6%) RDoc::Parser::C#gen_body_table 429 (8.8%) 210 (4.3%) RDoc::Parser::RubyTools#get_tk 208 (4.3%) 208 (4.3%) RDoc::TokenStream#add_tokens --- lib/rdoc/parser/c.rb | 198 +++++++++++++++++++--------------------- test/rdoc/test_rdoc_parser_c.rb | 34 +------ 2 files changed, 98 insertions(+), 134 deletions(-) diff --git a/lib/rdoc/parser/c.rb b/lib/rdoc/parser/c.rb index 5cc009e499..8265712370 100644 --- a/lib/rdoc/parser/c.rb +++ b/lib/rdoc/parser/c.rb @@ -324,12 +324,100 @@ class RDoc::Parser::C < RDoc::Parser # Scans #content for rb_define_class, boot_defclass, rb_define_class_under # and rb_singleton_class - def do_classes - do_boot_defclass - do_define_class - do_define_class_under - do_singleton_class - do_struct_define_without_accessor + def do_classes_and_modules + do_boot_defclass if @file_name == "class.c" + + @content.scan( + %r( + (?[\w\.]+)\s* = + \s*rb_(?: + define_(?: + class(?: # rb_define_class(class_name_1, parent_name_1) + \s*\( + \s*"(?\w+)", + \s*(?\w+)\s* + \) + | + _under\s*\( # rb_define_class_under(class_under, class_name2, parent_name2...) + \s* (?\w+), + \s* "(?\w+)", + \s* + (?: + (?[\w\*\s\(\)\.\->]+) | + rb_path2class\("(?[\w:]+)"\) + ) + \s*\) + ) + | + module(?: # rb_define_module(module_name_1) + \s*\( + \s*"(?\w+)"\s* + \) + | + _under\s*\( # rb_define_module_under(module_under, module_name_1) + \s*(?\w+), + \s*"(?\w+)" + \s*\) + ) + ) + | + struct_define_without_accessor\s*\( # rb_struct_define_without_accessor(class_name_3, parent_name_3, ...) + \s*"(?\w+)", + \s*(?\w+), + \s*\w+, # Allocation function + (?:\s*"\w+",)* # Attributes + \s*NULL + \) + | + singleton_class\s*\( # rb_singleton_class(target_class_name) + \s*(?\w+) + \) + ) + )mx + ) do + class_name = $~[:class_name_1] + type = :class + if class_name + # rb_define_class(class_name_1, parent_name_1) + parent_name = $~[:parent_name_1] + #under = nil + else + class_name = $~[:class_name_2] + if class_name + # rb_define_class_under(class_under, class_name2, parent_name2...) + parent_name = $~[:parent_name_2] || $~[:path] + under = $~[:class_under] + else + class_name = $~[:class_name_3] + if class_name + # rb_struct_define_without_accessor(class_name_3, parent_name_3, ...) + parent_name = $~[:parent_name_3] + #under = nil + else + type = :module + class_name = $~[:module_name_1] + #parent_name = nil + if class_name + # rb_define_module(module_name_1) + #under = nil + else + class_name = $~[:module_name_2] + if class_name + # rb_define_module_under(module_under, module_name_1) + under = $~[:module_under] + else + # rb_singleton_class(target_class_name) + target_class_name = $~[:target_class_name] + handle_singleton $~[:var_name], target_class_name + next + end + end + end + end + end + + handle_class_module($~[:var_name], type, class_name, parent_name, under) + end end ## @@ -378,65 +466,6 @@ class RDoc::Parser::C < RDoc::Parser end end - ## - # Scans #content for rb_define_class - - def do_define_class - # The '.' lets us handle SWIG-generated files - @content.scan(/([\w\.]+)\s* = \s*rb_define_class\s* - \( - \s*"(\w+)", - \s*(\w+)\s* - \)/mx) do |var_name, class_name, parent| - handle_class_module(var_name, :class, class_name, parent, nil) - end - end - - ## - # Scans #content for rb_define_class_under - - def do_define_class_under - @content.scan(/([\w\.]+)\s* = # var_name - \s*rb_define_class_under\s* - \( - \s* (\w+), # under - \s* "(\w+)", # class_name - \s* - (?: - ([\w\*\s\(\)\.\->]+) | # parent_name - rb_path2class\("([\w:]+)"\) # path - ) - \s* - \) - /mx) do |var_name, under, class_name, parent_name, path| - parent = path || parent_name - - handle_class_module var_name, :class, class_name, parent, under - end - end - - ## - # Scans #content for rb_define_module - - def do_define_module - @content.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do - |var_name, class_name| - handle_class_module(var_name, :module, class_name, nil, nil) - end - end - - ## - # Scans #content for rb_define_module_under - - def do_define_module_under - @content.scan(/(\w+)\s* = \s*rb_define_module_under\s* - \( - \s*(\w+), - \s*"(\w+)" - \s*\)/mx) do |var_name, in_module, class_name| - handle_class_module(var_name, :module, class_name, nil, in_module) - end - end ## # Scans #content for rb_include_module @@ -518,42 +547,6 @@ class RDoc::Parser::C < RDoc::Parser end end - ## - # Scans #content for rb_define_module and rb_define_module_under - - def do_modules - do_define_module - do_define_module_under - end - - ## - # Scans #content for rb_singleton_class - - def do_singleton_class - @content.scan(/([\w\.]+)\s* = \s*rb_singleton_class\s* - \( - \s*(\w+) - \s*\)/mx) do |sclass_var, class_var| - handle_singleton sclass_var, class_var - end - end - - ## - # Scans #content for struct_define_without_accessor - - def do_struct_define_without_accessor - @content.scan(/([\w\.]+)\s* = \s*rb_struct_define_without_accessor\s* - \( - \s*"(\w+)", # Class name - \s*(\w+), # Parent class - \s*\w+, # Allocation function - (\s*"\w+",)* # Attributes - \s*NULL - \)/mx) do |var_name, class_name, parent| - handle_class_module(var_name, :class, class_name, parent, nil) - end - end - ## # Finds the comment for an alias on +class_name+ from +new_name+ to # +old_name+ @@ -1247,8 +1240,7 @@ class RDoc::Parser::C < RDoc::Parser def scan remove_commented_out_lines - do_modules - do_classes + do_classes_and_modules do_missing do_constants diff --git a/test/rdoc/test_rdoc_parser_c.rb b/test/rdoc/test_rdoc_parser_c.rb index 81727ad759..6601d28f60 100644 --- a/test/rdoc/test_rdoc_parser_c.rb +++ b/test/rdoc/test_rdoc_parser_c.rb @@ -304,32 +304,6 @@ void Init_Blah(void) { assert_equal 'This should show up as an alias', methods.last.comment.text end - def test_do_classes_boot_class - content = <<-EOF -/* Document-class: Foo - * this is the Foo boot class - */ -VALUE cFoo = boot_defclass("Foo", rb_cObject); - EOF - - klass = util_get_class content, 'cFoo' - assert_equal "this is the Foo boot class", klass.comment.text - assert_equal 'Object', klass.superclass - end - - def test_do_classes_boot_class_nil - content = <<-EOF -/* Document-class: Foo - * this is the Foo boot class - */ -VALUE cFoo = boot_defclass("Foo", 0); - EOF - - klass = util_get_class content, 'cFoo' - assert_equal "this is the Foo boot class", klass.comment.text - assert_nil klass.superclass - end - def test_do_aliases_missing_class content = <<-EOF void Init_Blah(void) { @@ -511,7 +485,7 @@ void Init_foo(){ @parser = util_parser content - @parser.do_classes + @parser.do_classes_and_modules @parser.do_constants klass = @parser.classes['cFoo'] @@ -581,8 +555,7 @@ void Init_curses(){ @parser = util_parser content - @parser.do_modules - @parser.do_classes + @parser.do_classes_and_modules @parser.do_constants klass = @parser.classes['mCurses'] @@ -608,8 +581,7 @@ void Init_File(void) { @parser = util_parser content - @parser.do_modules - @parser.do_classes + @parser.do_classes_and_modules @parser.do_constants klass = @parser.classes['rb_mFConst'] -- cgit v1.2.3