1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
|
require 'rdoc/parser'
##
#
# This is an attamept to write a basic parser for Perl's
# POD (Plain old Documentation) format. Ruby code must
# co-exist with Perl, and some tasks are easier in Perl
# than Ruby because of existing libraries.
#
# One difficult is that Perl POD has no means of identifying
# the classes (packages) and methods (subs) with which it
# is associated, it is more like literate programming in so
# far as it just happens to be in the same place as the code,
# but need not be.
#
# We would like to support all the markup the POD provides
# so that it will convert happily to HTML. At the moment
# I don't think I can do that: time constraints.
#
class RDoc::Parser::PerlPOD < RDoc::Parser
parse_files_matching(/.p[lm]$/)
##
# Prepare to parse a perl file
def initialize(top_level, file_name, content, options, stats)
super
preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include
preprocess.handle @content do |directive, param|
warn "Unrecognized directive '#{directive}' in #{@file_name}"
end
end
##
# Extract the Pod(-like) comments from the code.
# At its most basic there will ne no need to distinguish
# between the different types of header, etc.
#
# This uses a simple finite state machine, in a very
# procedural pattern. I could "replace case with polymorphism"
# but I think it would obscure the intent, scatter the
# code all over tha place. This machine is necessary
# because POD requires that directives be preceded by
# blank lines, so reading line by line is necessary,
# and preserving state about what is seen is necesary.
def scan
@top_level.comment ||= ""
state=:code_blank
line_number = 0
line = nil
# This started out as a really long nested case statement,
# which also led to repetitive code. I'd like to avoid that
# so I'm using a "table" instead.
# Firstly we need some procs to do the transition and processing
# work. Because these are procs they are closures, and they can
# use variables in the local scope.
#
# First, the "nothing to see here" stuff.
code_noop = lambda do
if line =~ /^\s+$/
state = :code_blank
end
end
pod_noop = lambda do
if line =~ /^\s+$/
state = :pod_blank
end
@top_level.comment += filter(line)
end
begin_noop = lambda do
if line =~ /^\s+$/
state = :begin_blank
end
@top_level.comment += filter(line)
end
# Now for the blocks that process code and comments...
transit_to_pod = lambda do
case line
when /^=(?:pod|head\d+)/
state = :pod_no_blank
@top_level.comment += filter(line)
when /^=over/
state = :over_no_blank
@top_level.comment += filter(line)
when /^=(?:begin|for)/
state = :begin_no_blank
end
end
process_pod = lambda do
case line
when /^\s*$/
state = :pod_blank
@top_level.comment += filter(line)
when /^=cut/
state = :code_no_blank
when /^=end/
$stderr.puts "'=end' unexpected at #{line_number} in #{@file_name}"
else
@top_level.comment += filter(line)
end
end
process_begin = lambda do
case line
when /^\s*$/
state = :begin_blank
@top_level.comment += filter(line)
when /^=end/
state = :code_no_blank
when /^=cut/
$stderr.puts "'=cut' unexpected at #{line_number} in #{@file_name}"
else
@top_level.comment += filter(line)
end
end
transitions = { :code_no_blank => code_noop,
:code_blank => transit_to_pod,
:pod_no_blank => pod_noop,
:pod_blank => process_pod,
:begin_no_blank => begin_noop,
:begin_blank => process_begin}
@content.each_line do |l|
line = l
line_number += 1
transitions[state].call
end # each line
@top_level
end
# Filter the perl markup that does the same as the rdoc
# filtering. Only basic for now. Will probably need a
# proper parser to cope with C<<...>> etc
def filter(comment)
return '' if comment =~ /^=pod\s*$/
comment.gsub!(/^=pod/, '==')
comment.gsub!(/^=head(\d+)/) do
"=" * $1.to_i
end
comment.gsub!(/=item/, '');
comment.gsub!(/C<(.*?)>/, '<tt>\1</tt>');
comment.gsub!(/I<(.*?)>/, '<i>\1</i>');
comment.gsub!(/B<(.*?)>/, '<b>\1</b>');
comment
end
end
|