From d73c44404d470424aa58e85fe38b97351f112bc1 Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Wed, 3 Oct 2018 17:43:48 +0200 Subject: A perl module to parse through C headers OpenSSL::ParseC is a module that parses through a C header file and returns a list with information on what it found. Currently, the information it returns covers function and variable declarations, macro definitions, struct declarations/definitions and typedef definitions. Reviewed-by: Tim Hudson (Merged from https://github.com/openssl/openssl/pull/7191) --- util/perl/OpenSSL/ParseC.pm | 1129 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1129 insertions(+) create mode 100644 util/perl/OpenSSL/ParseC.pm (limited to 'util/perl') diff --git a/util/perl/OpenSSL/ParseC.pm b/util/perl/OpenSSL/ParseC.pm new file mode 100644 index 0000000000..ba2427ccfa --- /dev/null +++ b/util/perl/OpenSSL/ParseC.pm @@ -0,0 +1,1129 @@ +#! /usr/bin/env perl +# Copyright 2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +package OpenSSL::ParseC; + +use strict; +use warnings; + +use Exporter; +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); +$VERSION = "0.9"; +@ISA = qw(Exporter); +@EXPORT = qw(parse); + +# Global handler data +my @preprocessor_conds; # A list of simple preprocessor conditions, + # each item being a list of macros defined + # or not defined. + +# Handler helpers +sub all_conds { + return map { ( @$_ ) } @preprocessor_conds; +} + +# A list of handlers that will look at a "complete" string and try to +# figure out what to make of it. +# Each handler is a hash with the following keys: +# +# regexp a regexp to compare the "complete" string with. +# checker a function that does a more complex comparison. +# Use this instead of regexp if that isn't enough. +# massager massages the "complete" string into an array with +# the following elements: +# +# [0] String that needs further processing (this +# applies to typedefs of structs), or empty. +# [1] The name of what was found. +# [2] A character that denotes what type of thing +# this is: 'F' for function, 'S' for struct, +# 'T' for typedef, 'M' for macro, 'V' for +# variable. +# [3] Return type (only for type 'F' and 'V') +# [4] Value (for type 'M') or signature (for type 'F', +# 'V', 'T' or 'S') +# [5...] The list of preprocessor conditions this is +# found in, as in checks for macro definitions +# (stored as the macro's name) or the absence +# of definition (stored as the macro's name +# prefixed with a '!' +# +# If the massager returns an empty list, it means the +# "complete" string has side effects but should otherwise +# be ignored. +# If the massager is undefined, the "complete" string +# should be ignored. +my @opensslcpphandlers = ( + ################################################################## + # OpenSSL CPP specials + # + # These are used to convert certain pre-precessor expressions into + # others that @cpphandlers have a better chance to understand. + + { regexp => qr/#if OPENSSL_API_COMPAT(\S+)(0x[0-9a-fA-F]{8})L$/, + massager => sub { + my $op = $1; + my $v = hex($2); + if ($op ne '<' && $op ne '>=') { + die "Error: unacceptable operator $op: $_[0]\n"; + } + my ($one, $major, $minor) = + ( ($v >> 28) & 0xf, + ($v >> 20) & 0xff, + ($v >> 12) & 0xff ); + my $t = "DEPRECATEDIN_${one}_${major}_${minor}"; + my $cond = $op eq '<' ? 'ifndef' : 'ifdef'; + return (<<"EOF"); +#$cond $t +EOF + } + } +); +my @cpphandlers = ( + ################################################################## + # CPP stuff + + { regexp => qr/#ifdef ?(.*)/, + massager => sub { + my %opts; + if (ref($_[$#_]) eq "HASH") { + %opts = %{$_[$#_]}; + pop @_; + } + push @preprocessor_conds, [ $1 ]; + print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" + if $opts{debug}; + return (); + }, + }, + { regexp => qr/#ifndef ?(.*)/, + massager => sub { + my %opts; + if (ref($_[$#_]) eq "HASH") { + %opts = %{$_[$#_]}; + pop @_; + } + push @preprocessor_conds, [ '!'.$1 ]; + print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" + if $opts{debug}; + return (); + }, + }, + { regexp => qr/#if (0|1)/, + massager => sub { + my %opts; + if (ref($_[$#_]) eq "HASH") { + %opts = %{$_[$#_]}; + pop @_; + } + if ($1 eq "1") { + push @preprocessor_conds, [ "TRUE" ]; + } else { + push @preprocessor_conds, [ "!TRUE" ]; + } + print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" + if $opts{debug}; + return (); + }, + }, + { regexp => qr/#if ?(.*)/, + massager => sub { + my %opts; + if (ref($_[$#_]) eq "HASH") { + %opts = %{$_[$#_]}; + pop @_; + } + my @results = (); + my $conds = $1; + if ($conds =~ m|^defined<<<\(([^\)]*)\)>>>(.*)$|) { + push @results, $1; # Handle the simple case + my $rest = $2; + my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/; + print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n" + if $opts{debug}; + if ($rest =~ m/$re/) { + my @rest = split /\|\|/, $rest; + shift @rest; + foreach (@rest) { + m|^defined<<<\(([^\)]*)\)>>>$|; + die "Something wrong...$opts{PLACE}" if $1 eq ""; + push @results, $1; + } + } else { + $conds =~ s/<<<|>>>//g; + warn "Warning: complicated #if expression(1): $conds$opts{PLACE}" + if $opts{warnings}; + } + } elsif ($conds =~ m|^!defined<<<\(([^\)]*)\)>>>(.*)$|) { + push @results, '!'.$1; # Handle the simple case + my $rest = $2; + my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/; + print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n" + if $opts{debug}; + if ($rest =~ m/$re/) { + my @rest = split /\&\&/, $rest; + shift @rest; + foreach (@rest) { + m|^!defined<<<\(([^\)]*)\)>>>$|; + die "Something wrong...$opts{PLACE}" if $1 eq ""; + push @results, '!'.$1; + } + } else { + $conds =~ s/<<<|>>>//g; + warn "Warning: complicated #if expression(2): $conds$opts{PLACE}" + if $opts{warnings}; + } + } else { + $conds =~ s/<<<|>>>//g; + warn "Warning: complicated #if expression(3): $conds$opts{PLACE}" + if $opts{warnings}; + } + print STDERR "DEBUG[",$opts{debug_type},"]: Added preprocessor conds: '", join("', '", @results), "'\n" + if $opts{debug}; + push @preprocessor_conds, [ @results ]; + print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" + if $opts{debug}; + return (); + }, + }, + { regexp => qr/#elif (.*)/, + massager => sub { + my %opts; + if (ref($_[$#_]) eq "HASH") { + %opts = %{$_[$#_]}; + pop @_; + } + die "An #elif without corresponding condition$opts{PLACE}" + if !@preprocessor_conds; + pop @preprocessor_conds; + print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" + if $opts{debug}; + return (<<"EOF"); +#if $1 +EOF + }, + }, + { regexp => qr/#else/, + massager => sub { + my %opts; + if (ref($_[$#_]) eq "HASH") { + %opts = %{$_[$#_]}; + pop @_; + } + die "An #else without corresponding condition$opts{PLACE}" + if !@preprocessor_conds; + # Invert all conditions on the last level + my $stuff = pop @preprocessor_conds; + push @preprocessor_conds, [ + map { m|^!(.*)$| ? $1 : '!'.$_ } @$stuff + ]; + print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" + if $opts{debug}; + return (); + }, + }, + { regexp => qr/#endif ?/, + massager => sub { + my %opts; + if (ref($_[$#_]) eq "HASH") { + %opts = %{$_[$#_]}; + pop @_; + } + die "An #endif without corresponding condition$opts{PLACE}" + if !@preprocessor_conds; + pop @preprocessor_conds; + print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" + if $opts{debug}; + return (); + }, + }, + { regexp => qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/, + massager => sub { + my $name = $1; + my $params = $2; + my $spaceval = $3||""; + my $val = $4||""; + return ("", + $1, 'M', "", $params ? "$name$params$spaceval" : $val, + all_conds()); } + }, + { regexp => qr/#.*/, + massager => sub { return (); } + }, + ); + +my @opensslchandlers = ( + ################################################################## + # OpenSSL C specials + # + # They are really preprocessor stuff, but they look like C stuff + # to this parser. All of these do replacements, anything else is + # an error. + + ##### + # Global variable stuff + { regexp => qr/OPENSSL_DECLARE_GLOBAL<<<\((.*),(.*)\)>>>;/, + massager => sub { return (<<"EOF"); +#ifndef OPENSSL_EXPORT_VAR_AS_FUNCTION +OPENSSL_EXPORT $1 _shadow_$2; +#else +$1 *_shadow_$2(void); +#endif +EOF + }, + }, + + ##### + # Deprecated stuff, by OpenSSL release. + + # We trick the parser by pretending that the declaration is wrapped in a + # check if the DEPRECATEDIN macro is defined or not. Callers of parse() + # will have to decide what to do with it. + { regexp => qr/(DEPRECATEDIN_\d+_\d+_\d+)<<<\((.*)\)>>>/, + massager => sub { return (<<"EOF"); +#ifndef $1 +$2; +#endif +EOF + }, + }, + + ##### + # LHASH stuff + + # LHASH_OF(foo) is used as a type, but the chandlers won't take it + # gracefully, so we expand it here. + { regexp => qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/, + massager => sub { return ("$1struct lhash_st_$2$3"); } + }, + { regexp => qr/DEFINE_LHASH_OF<<<\((.*)\)>>>/, + massager => sub { + return (<<"EOF"); +static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *), + int (*cfn)(const $1 *, const $1 *)); +static ossl_inline void lh_$1_free(LHASH_OF($1) *lh); +static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d); +static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d); +static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d); +static ossl_inline int lh_$1_error(LHASH_OF($1) *lh); +static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh); +static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out); +static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh, + BIO *out); +static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out); +static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh); +static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl); +static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *)); +LHASH_OF($1) +EOF + } + }, + + ##### + # STACK stuff + + # STACK_OF(foo) is used as a type, but the chandlers won't take it + # gracefully, so we expand it here. + { regexp => qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/, + massager => sub { return ("$1struct stack_st_$2$3"); } + }, +# { regexp => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/, +# massager => sub { +# my $before = $1; +# my $stack_of = "struct stack_st_$2"; +# my $after = $3; +# if ($after =~ m|^\w|) { $after = " ".$after; } +# return ("$before$stack_of$after"); +# } +# }, + { regexp => qr/SKM_DEFINE_STACK_OF<<<\((.*),(.*),(.*)\)>>>/, + massager => sub { + return (<<"EOF"); +STACK_OF($1); +typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b); +typedef void (*sk_$1_freefunc)($3 *a); +typedef $3 * (*sk_$1_copyfunc)(const $3 *a); +static ossl_inline int sk_$1_num(const STACK_OF($1) *sk); +static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx); +static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare); +static ossl_inline STACK_OF($1) *sk_$1_new_null(void); +static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare, + int n); +static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n); +static ossl_inline void sk_$1_free(STACK_OF($1) *sk); +static ossl_inline void sk_$1_zero(STACK_OF($1) *sk); +static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i); +static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr); +static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr); +static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr); +static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk); +static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk); +static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk, + sk_$1_freefunc freefunc); +static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx); +static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr); +static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr); +static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr); +static ossl_inline void sk_$1_sort(STACK_OF($1) *sk); +static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk); +static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk); +static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk, + sk_$1_copyfunc copyfunc, + sk_$1_freefunc freefunc); +static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk, + sk_$1_compfunc compare); +EOF + } + }, + { regexp => qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),(.*)\)>>>/, + massager => sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); }, + }, + { regexp => qr/DEFINE_STACK_OF<<<\((.*)\)>>>/, + massager => sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); }, + }, + { regexp => qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),(.*)\)>>>/, + massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); }, + }, + { regexp => qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/, + massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); }, + }, + { regexp => qr/PREDECLARE_STACK_OF<<<\((.*)\)>>>/, + massager => sub { return ("STACK_OF($1);"); } + }, + { regexp => qr/DECLARE_STACK_OF<<<\((.*)\)>>>/, + massager => sub { return ("STACK_OF($1);"); } + }, + { regexp => qr/DECLARE_SPECIAL_STACK_OF<<<\((.*?),(.*?)\)>>>/, + massager => sub { return ("STACK_OF($1);"); } + }, + + ##### + # ASN1 stuff + + { regexp => qr/TYPEDEF_D2I_OF<<<\((.*)\)>>>/, + massager => sub { + return ("typedef $1 *d2i_of_$1($1 **,const unsigned char **,long)"); + }, + }, + { regexp => qr/TYPEDEF_I2D_OF<<<\((.*)\)>>>/, + massager => sub { + return ("typedef $1 *i2d_of_$1($1 *,unsigned char **)"); + }, + }, + { regexp => qr/TYPEDEF_D2I2D_OF<<<\((.*)\)>>>/, + massager => sub { + return ("TYPEDEF_D2I_OF($1); TYPEDEF_I2D_OF($1)"); + }, + }, + { regexp => qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/, + massager => sub { + return (<<"EOF"); +#ifndef OPENSSL_EXPORT_VAR_AS_FUNCTION +OPENSSL_EXTERN const ASN1_ITEM *$1_it; +#else +const ASN1_ITEM *$1_it(void); +#endif +EOF + }, + }, + { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),(.*),(.*)\)>>>/, + massager => sub { + return (<<"EOF"); +int d2i_$3(void); +int i2d_$3(void); +DECLARE_ASN1_ITEM($2) +EOF + }, + }, + { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_const<<<\((.*),(.*)\)>>>/, + massager => sub { + return (<<"EOF"); +int d2i_$2(void); +int i2d_$2(void); +DECLARE_ASN1_ITEM($2) +EOF + }, + }, + { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/, + massager => sub { + return (<<"EOF"); +int $1_free(void); +int $1_new(void); +EOF + }, + }, + { regexp => qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),(.*)\)>>>/, + massager => sub { + return (<<"EOF"); +int d2i_$2(void); +int i2d_$2(void); +int $2_free(void); +int $2_new(void); +DECLARE_ASN1_ITEM($2) +EOF + }, + }, + { regexp => qr/DECLARE_ASN1_FUNCTIONS_fname<<<\((.*),(.*),(.*)\)>>>/, + massager => sub { return (<<"EOF"); +int d2i_$3(void); +int i2d_$3(void); +int $3_free(void); +int $3_new(void); +DECLARE_ASN1_ITEM($2) +EOF + } + }, + { regexp => qr/DECLARE_ASN1_FUNCTIONS(?:_const)?<<<\((.*)\)>>>/, + massager => sub { return (<<"EOF"); +int d2i_$1(void); +int i2d_$1(void); +int $1_free(void); +int $1_new(void); +DECLARE_ASN1_ITEM($1) +EOF + } + }, + { regexp => qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/, + massager => sub { + return (<<"EOF"); +int i2d_$1_NDEF(void); +EOF + } + }, + { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/, + massager => sub { + return (<<"EOF"); +int $1_print_ctx(void); +EOF + } + }, + { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),(.*)\)>>>/, + massager => sub { + return (<<"EOF"); +int $2_print_ctx(void); +EOF + } + }, + { regexp => qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/, + massager => sub { return (); } + }, + { regexp => qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/, + massager => sub { return (); } + }, + { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/, + massager => sub { return (<<"EOF"); +#ifndef OPENSSL_NO_STDIO +int PEM_read_$1(void); +int PEM_write_$1(void); +#endif +int PEM_read_bio_$1(void); +int PEM_write_bio_$1(void); +EOF + }, + }, + + ##### + # PEM stuff + { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/, + massager => sub { return (<<"EOF"); +#ifndef OPENSSL_NO_STDIO +int PEM_write_$1(void); +#endif +int PEM_write_bio_$1(void); +EOF + }, + }, + { regexp => qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/, + massager => sub { return (<<"EOF"); +#ifndef OPENSSL_NO_STDIO +int PEM_read_$1(void); +#endif +int PEM_read_bio_$1(void); +EOF + }, + }, + + # Spurious stuff found in the OpenSSL headers + # Usually, these are just macros that expand to, well, something + { regexp => qr/__NDK_FPABI__/, + massager => sub { return (); } + }, + ); + +my $anoncnt = 0; + +my @chandlers = ( + ################################################################## + # C stuff + + # extern "C" of individual items + # Note that the main parse function has a special hack for 'extern "C" {' + # which can't be done in handlers + # We simply ignore it. + { regexp => qr/extern "C" (.*;)/, + massager => sub { return ($1); }, + }, + # union, struct and enum definitions + # Because this one might appear a little everywhere within type + # definitions, we take it out and replace it with just + # 'union|struct|enum name' while registering it. + # This makes use of the parser trick to surround the outer braces + # with <<< and >>> + { regexp => qr/(.*) # Anything before ($1) + \b # word to non-word boundary + (union|struct|enum) # The word used ($2) + (?:\s([[:alpha:]_]\w*))? # Struct or enum name ($3) + <<<(\{.*?\})>>> # Struct or enum definition ($4) + (.*) # Anything after ($5) + ; + /x, + massager => sub { + my $before = $1; + my $word = $2; + my $name = $3 + || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct + my $definition = $4; + my $after = $5; + my $type = $word eq "struct" ? 'S' : 'E'; + if ($before ne "" || $after ne ";") { + if ($after =~ m|^\w|) { $after = " ".$after; } + return ("$before$word $name$after;", + "$word $name", $type, "", "$word$definition", all_conds()); + } + # If there was no before nor after, make the return much simple + return ("", "$word $name", $type, "", "$word$definition", all_conds()); + } + }, + # Named struct and enum forward declarations + # We really just ignore them, but we need to parse them or the variable + # declaration handler further down will think it's a variable declaration. + { regexp => qr/^(union|struct|enum) ([[:alpha:]_]\w*);/, + massager => sub { return (); } + }, + # Function returning function pointer declaration + { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) + ((?:\w|\*|\s)*?) # Return type ($2) + \s? # Possible space + <<<\(\* + ([[:alpha:]_]\w*) # Function name ($3) + (\(.*\)) # Parameters ($4) + \)>>> + <<<(\(.*\))>>> # F.p. parameters ($5) + ; + /x, + massager => sub { + return ("", $3, 'F', "", "$2(*$4)$5", all_conds()) + if defined $1; + return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds()); } + }, + # Function pointer declaration, or typedef thereof + { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) + ((?:\w|\*|\s)*?) # Return type ($2) + <<<\(\*([[:alpha:]_]\w*)\)>>> # T.d. or var name ($3) + <<<(\(.*\))>>> # F.p. parameters ($4) + ; + /x, + massager => sub { + return ("", $3, 'T', "", "$2(*)$4", all_conds()) + if defined $1; + return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds()); + }, + }, + # Function declaration, or typedef thereof + { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) + ((?:\w|\*|\s)*?) # Return type ($2) + \s? # Possible space + ([[:alpha:]_]\w*) # Function name ($3) + <<<(\(.*\))>>> # Parameters ($4) + ; + /x, + massager => sub { + return ("", $3, 'T', "", "$2$4", all_conds()) + if defined $1; + return ("", $3, 'F', $2, "$2$4", all_conds()); + }, + }, + # Variable declaration, including arrays, or typedef thereof + { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) + ((?:\w|\*|\s)*?) # Type ($2) + \s? # Possible space + ([[:alpha:]_]\w*) # Variable name ($3) + ((?:<<<\[[^\]]*\]>>>)*) # Possible array declaration ($4) + ; + /x, + massager => sub { + return ("", $3, 'T', "", $2.($4||""), all_conds()) + if defined $1; + return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds()); + }, + }, +); + +# End handlers are almost the same as handlers, except they are run through +# ONCE when the input has been parsed through. These are used to check for +# remaining stuff, such as an unfinished #ifdef and stuff like that that the +# main parser can't check on its own. +my @endhandlers = ( + { massager => sub { + my %opts = %{$_[0]}; + + die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename} ? " in file ".$opts{filename}: ""),$opts{PLACE} + if @preprocessor_conds; + } + } + ); + +# takes a list of strings that can each contain one or several lines of code +# also takes a hash of options as last argument. +# +# returns a list of hashes with information: +# +# name name of the thing +# type type, see the massage handler function +# returntype return type of functions and variables +# value value for macros, signature for functions, variables +# and structs +# conds preprocessor conditions (array ref) + +sub parse { + my %opts; + if (ref($_[$#_]) eq "HASH") { + %opts = %{$_[$#_]}; + pop @_; + } + my %state = ( + in_extern_C => 0, # An exception to parenthesis processing. + cpp_parens => [], # A list of ending parens and braces found in + # preprocessor directives + c_parens => [], # A list of ending parens and braces found in + # C statements + in_string => "", # empty string when outside a string, otherwise + # "'" or '"' depending on the starting quote. + in_comment => "", # empty string when outside a comment, otherwise + # "/*" or "//" depending on the type of comment + # found. The latter will never be multiline + # NOTE: in_string and in_comment will never be + # true (in perl semantics) at the same time. + current_line => 0, + ); + my @result = (); + my $normalized_line = ""; # $input_line, but normalized. In essence, this + # means that ALL whitespace is removed unless + # it absolutely has to be present, and in that + # case, there's only one space. + # The cases where a space needs to stay present + # are: + # 1. between words + # 2. between words and number + # 3. after the first word of a preprocessor + # directive. + # 4. for the #define directive, between the macro + # name/args and its value, so we end up with: + # #define FOO val + # #define BAR(x) something(x) + my $collected_stmt = ""; # Where we're building up a C line until it's a + # complete definition/declaration, as determined + # by any handler being capable of matching it. + + # We use $_ shamelessly when looking through @lines. + # In case we find a \ at the end, we keep filling it up with more lines. + $_ = undef; + + foreach my $line (@_) { + # split tries to be smart when a string ends with the thing we split on + $line .= "\n" unless $line =~ m|\R$|; + $line .= "#"; + + # We use ¦undef¦ as a marker for a new line from the file. + # Since we convert one line to several and unshift that into @lines, + # that's the only safe way we have to track the original lines + my @lines = map { ( undef, $_ ) } split $/, $line; + + # Remember that extra # we added above? Now we remove it + pop @lines; + pop @lines; # Don't forget the undef + + while (@lines) { + if (!defined($lines[0])) { + shift @lines; + $state{current_line}++; + if (!defined($_)) { + $opts{PLACE} = " at ".$opts{filename}." line ".$state{current_line}."\n"; + $opts{PLACE2} = $opts{filename}.":".$state{current_line}; + } + next; + } + + $_ = "" unless defined $_; + $_ .= shift @lines; + + if (m|\\$|) { + $_ = $`; + next; + } + + if ($opts{debug}) { + print STDERR "DEBUG:----------------------------\n"; + print STDERR "DEBUG: \$_ = '$_'\n"; + } + + ########################################################## + # Now that we have a full line, let's process through it + while(1) { + unless ($state{in_comment}) { + # Begin with checking if the current $normalized_line + # contains a preprocessor directive + # This is only done if we're not inside a comment and + # if it's a preprocessor directive and it's finished. + if ($normalized_line =~ m|^#| && $_ eq "") { + print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n" + if $opts{debug}; + $opts{debug_type} = "OPENSSL CPP"; + my @r = ( _run_handlers($normalized_line, + @opensslcpphandlers, + \%opts) ); + if (shift @r) { + # Checking if there are lines to inject. + if (@r) { + @r = split $/, (pop @r).$_; + print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n" + if $opts{debug} && @r; + @lines = ( @r, @lines ); + + $_ = ""; + } + } else { + print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n" + if $opts{debug}; + $opts{debug_type} = "CPP"; + my @r = ( _run_handlers($normalized_line, + @cpphandlers, + \%opts) ); + if (shift @r) { + if (ref($r[0]) eq "HASH") { + push @result, shift @r; + } + + # Now, check if there are lines to inject. + # Really, this should never happen, it IS a + # preprocessor directive after all... + if (@r) { + @r = split $/, pop @r; + print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n" + if $opts{debug} && @r; + @lines = ( @r, @lines ); + $_ = ""; + } + } + } + + # Note: we simply ignore all directives that no + # handler matches + $normalized_line = ""; + } + + # If the two strings end and start with a character that + # shouldn't get concatenated, add a space + my $space = + ($collected_stmt =~ m/(?:"|')$/ + || ($collected_stmt =~ m/(?:\w|\d)$/ + && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : ""; + + # Now, unless we're building up a preprocessor directive or + # are in the middle of a string, or the parens et al aren't + # balanced up yet, let's try and see if there's a OpenSSL + # or C handler that can make sense of what we have so far. + if ( $normalized_line !~ m|^#| + && ($collected_stmt ne "" || $normalized_line ne "") + && ! @{$state{c_parens}} + && ! $state{in_string} ) { + if ($opts{debug}) { + print STDERR "DEBUG[OPENSSL C]: \$collected_stmt = '$collected_stmt'\n"; + print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n"; + } + $opts{debug_type} = "OPENSSL C"; + my @r = ( _run_handlers($collected_stmt + .$space + .$normalized_line, + @opensslchandlers, + \%opts) ); + if (shift @r) { + # Checking if there are lines to inject. + if (@r) { + @r = split $/, (pop @r).$_; + print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n" + if $opts{debug} && @r; + @lines = ( @r, @lines ); + + $_ = ""; + } + $normalized_line = ""; + $collected_stmt = ""; + } else { + if ($opts{debug}) { + print STDERR "DEBUG[C]: \$collected_stmt = '$collected_stmt'\n"; + print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n"; + } + $opts{debug_type} = "C"; + my @r = ( _run_handlers($collected_stmt + .$space + .$normalized_line, + @chandlers, + \%opts) ); + if (shift @r) { + if (ref($r[0]) eq "HASH") { + push @result, shift @r; + } + + # Checking if there are lines to inject. + if (@r) { + @r = split $/, (pop @r).$_; + print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n" + if $opts{debug} && @r; + @lines = ( @r, @lines ); + + $_ = ""; + } + $normalized_line = ""; + $collected_stmt = ""; + } + } + } + if ($_ eq "") { + $collected_stmt .= $space.$normalized_line; + $normalized_line = ""; + } + } + + if ($_ eq "") { + $_ = undef; + last; + } + + # Take care of inside string first. + if ($state{in_string}) { + if (m/ (?:^|(?>>)? + | \#[a-z]+ + $/x); + print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n" + if $opts{debug}; + $_ = $space.$rest; + } + + my $parens = + $normalized_line =~ m|^#| ? 'cpp_parens' : 'c_parens'; + (my $paren_singular = $parens) =~ s|s$||; + + # Now check for specific tokens, and if they are parens, + # check them against $state{$parens}. Note that we surround + # the outermost parens with extra "<<<" and ">>>". Those + # are for the benefit of handlers who to need to detect + # them, and they will be removed from the final output. + if (m|^[\{\[\(]|) { + my $body = $&; + $_ = $'; + if (!@{$state{$parens}}) { + if ("$normalized_line$body" =~ m|^extern "C"\{$|) { + $state{in_extern_C} = 1; + print STDERR "DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n" + if $opts{debug}; + $normalized_line = ""; + } else { + $normalized_line .= "<<<".$body; + } + } else { + $normalized_line .= $body; + } + + if ($normalized_line ne "") { + print STDERR "DEBUG: found $paren_singular start '$body'\n" + if $opts{debug}; + $body =~ tr|\{\[\(|\}\]\)|; + print STDERR "DEBUG: pushing $paren_singular end '$body'\n" + if $opts{debug}; + push @{$state{$parens}}, $body; + } + } elsif (m|^[\}\]\)]|) { + $_ = $'; + + if (!@{$state{$parens}} + && $& eq '}' && $state{in_extern_C}) { + print STDERR "DEBUG: found end of 'extern \"C\"'\n" + if $opts{debug}; + $state{in_extern_C} = 0; + } else { + print STDERR "DEBUG: Trying to match '$&' against '" + ,join("', '", @{$state{$parens}}) + ,"'\n" + if $opts{debug}; + die "Unmatched parentheses$opts{PLACE}\n" + unless (@{$state{$parens}} + && pop @{$state{$parens}} eq $&); + if (!@{$state{$parens}}) { + $normalized_line .= $&.">>>"; + } else { + $normalized_line .= $&; + } + } + } elsif (m|^["']|) { # string start + my $body = $&; + $_ = $'; + + # We want to separate strings from \w and \d with one space. + $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/; + $normalized_line .= $body; + $state{in_string} = $body; + } elsif (m|^\/\*|) { # C style comment + print STDERR "DEBUG: found start of C style comment\n" + if $opts{debug}; + $state{in_comment} = $&; + $_ = $'; + } elsif (m|^\/\/|) { # C++ style comment + print STDERR "DEBUG: found C++ style comment\n" + if $opts{debug}; + $_ = ""; # (just discard it entirely) + } elsif (m/^ (?| (?: 0[xX][[:xdigit:]]+ | 0[bB][01]+ | [0-9]+ ) + (?i: U | L | UL | LL | ULL )? + | [0-9]+\.[0-9]+(?:[eE][\-\+]\d+)? (?i: F | L)? + ) /x) { + print STDERR "DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n" + if $opts{debug}; + $normalized_line .= $&; + $_ = $'; + } elsif (m/^[[:alpha:]_]\w*/) { + my $body = $&; + my $rest = $'; + my $space = ""; + + # Now, only add a space if it's needed to separate + # two \w characters, and we also surround strings with + # a space. In this case, that's if $normalized_line ends + # with a \w, \d, " or '. + $space = " " + if ($normalized_line =~ m/("|')$/ + || ($normalized_line =~ m/(\w|\d)$/ + && $body =~ m/^(\w|\d)/)); + + print STDERR "DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n" + if $opts{debug}; + $normalized_line .= $space.$body; + $_ = $rest; + } elsif (m|^(?:\\)?.|) { # Catch-all + $normalized_line .= $&; + $_ = $'; + } + } + } + } + foreach my $handler (@endhandlers) { + if ($handler->{massager}) { + $handler->{massager}->(\%opts); + } + } + return @result; +} + +# arg1: line to check +# arg2...: handlers to check +# return undef when no handler matched +sub _run_handlers { + my %opts; + if (ref($_[$#_]) eq "HASH") { + %opts = %{$_[$#_]}; + pop @_; + } + my $line = shift; + my @handlers = @_; + + foreach my $handler (@handlers) { + if ($handler->{regexp} + && $line =~ m|^$handler->{regexp}$|) { + if ($handler->{massager}) { + if ($opts{debug}) { + print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n"; + print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n"; + } + my $saved_line = $line; + my @massaged = + map { s/(<<<|>>>)//g; $_ } + $handler->{massager}->($saved_line, \%opts); + print STDERR "DEBUG[",$opts{debug_type},"]: Got back '" + , join("', '", @massaged), "'\n" + if $opts{debug}; + + # Because we may get back new lines to be + # injected before whatever else that follows, + # and the injected stuff might include + # preprocessor lines, we need to inject them + # in @lines and set $_ to the empty string to + # break out from the inner loops + my $injected_lines = shift @massaged || ""; + + if (@massaged) { + return (1, + { + name => shift @massaged, + type => shift @massaged, + returntype => shift @massaged, + value => shift @massaged, + conds => [ @massaged ] + }, + $injected_lines + ); + } else { + print STDERR "DEBUG[",$opts{debug_type},"]: (ignore, possible side effects)\n" + if $opts{debug} && $injected_lines eq ""; + return (1, $injected_lines); + } + } + return (1); + } + } + return (0); +} -- cgit v1.2.3