lib/rdoc/parser/perl.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

require 'rdoc/parser'

##
#
# This is an attamept to write a basic parser for Perl's
# POD (Plain old Documentation) format.  Ruby code must
# co-exist with Perl, and some tasks are easier in Perl
# than Ruby because of existing libraries.
#
# One difficult is that Perl POD has no means of identifying
# the classes (packages) and methods (subs) with which it
# is associated, it is more like literate programming in so
# far as it just happens to be in the same place as the code,
# but need not be.
#
# We would like to support all the markup the POD provides
# so that it will convert happily to HTML.  At the moment
# I don't think I can do that: time constraints. 
#

class RDoc::Parser::PerlPOD < RDoc::Parser

  parse_files_matching(/.p[lm]$/)

  ##
  # Prepare to parse a perl file

  def initialize(top_level, file_name, content, options, stats)
    super

    preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include

    preprocess.handle @content do |directive, param|
      warn "Unrecognized directive '#{directive}' in #{@file_name}"
    end
  end

  ##
  # Extract the Pod(-like) comments from the code.
  # At its most basic there will ne no need to distinguish
  # between the different types of header, etc.
  #
  # This uses a simple finite state machine, in a very
  # procedural pattern. I could "replace case with polymorphism"
  # but I think it would obscure the intent, scatter the
  # code all over tha place.  This machine is necessary
  # because POD requires that directives be preceded by
  # blank lines, so reading line by line is necessary, 
  # and preserving state about what is seen is necesary.

  def scan

    @top_level.comment ||= ""
    state=:code_blank    
    line_number = 0
    line = nil

    # This started out as a really long nested case statement,
    # which also led to repetitive code.  I'd like to avoid that
    # so I'm using a "table" instead.
    
    # Firstly we need some procs to do the transition and processing
    # work.  Because these are procs they are closures, and they can
    # use variables in the local scope.
    #
    # First, the "nothing to see here" stuff.
    code_noop = lambda do 
      if line =~ /^\s+$/
	state = :code_blank
      end
    end

    pod_noop = lambda do 
      if line =~ /^\s+$/
	state = :pod_blank
      end
      @top_level.comment += filter(line)
    end

    begin_noop = lambda do 
      if line =~ /^\s+$/
	state = :begin_blank
      end
      @top_level.comment += filter(line)
    end

    # Now for the blocks that process code and comments...

    transit_to_pod = lambda do
      case line
      when /^=(?:pod|head\d+)/
	state = :pod_no_blank
	@top_level.comment += filter(line)
      when /^=over/
	state = :over_no_blank
	@top_level.comment += filter(line)
      when /^=(?:begin|for)/
	state = :begin_no_blank
      end
    end

    process_pod = lambda do
      case line
      when  /^\s*$/
	state = :pod_blank
	@top_level.comment += filter(line)
      when /^=cut/
	state = :code_no_blank
      when /^=end/
	$stderr.puts "'=end' unexpected at #{line_number} in #{@file_name}"
      else
	@top_level.comment += filter(line)
      end
    end


    process_begin = lambda do
      case line
      when  /^\s*$/
	state = :begin_blank
	@top_level.comment += filter(line)
      when /^=end/
	state = :code_no_blank
      when /^=cut/
	$stderr.puts "'=cut' unexpected at #{line_number} in #{@file_name}"
      else
	@top_level.comment += filter(line)
      end

    end


    transitions = { :code_no_blank => code_noop,
                    :code_blank => transit_to_pod,
		    :pod_no_blank => pod_noop,
		    :pod_blank => process_pod,
		    :begin_no_blank => begin_noop,
		    :begin_blank => process_begin}
    @content.each_line do |l|
      line = l
      line_number += 1
      transitions[state].call
    end # each line

    @top_level
  end

  # Filter the perl markup that does the same as the rdoc
  # filtering.  Only basic for now. Will probably need a
  # proper parser to cope with C<<...>> etc
  def filter(comment)
    return '' if comment =~ /^=pod\s*$/
    comment.gsub!(/^=pod/, '==')
    comment.gsub!(/^=head(\d+)/) do 
      "=" * $1.to_i
    end
    comment.gsub!(/=item/, '');
    comment.gsub!(/C<(.*?)>/, '<tt>\1</tt>');
    comment.gsub!(/I<(.*?)>/, '<i>\1</i>');
    comment.gsub!(/B<(.*?)>/, '<b>\1</b>');
    comment
  end

end