From 7c8d93968009d1f3123ebc78906d4abb896e6905 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 30 Oct 2023 21:58:48 -0400 Subject: [ruby/prism] Start better documenting C API https://github.com/ruby/prism/commit/2b6e661bbc --- prism/diagnostic.h | 6 +- prism/parser.h | 14 ++- prism/prism.c | 103 +++++++++++++++++- prism/templates/include/prism/ast.h.erb | 19 ++-- prism/util/pm_string.c | 184 +++++++++++++++++++------------- prism/util/pm_string.h | 78 ++++++++++---- 6 files changed, 299 insertions(+), 105 deletions(-) (limited to 'prism') diff --git a/prism/diagnostic.h b/prism/diagnostic.h index 0c9e4a3fd1..273cab22e5 100644 --- a/prism/diagnostic.h +++ b/prism/diagnostic.h @@ -8,7 +8,11 @@ #include #include -// This struct represents a diagnostic found during parsing. +/** + * This struct represents a diagnostic found during parsing. + * + * @extends pm_list_node_t + */ typedef struct { pm_list_node_t node; const uint8_t *start; diff --git a/prism/parser.h b/prism/parser.h index c701e595a9..01b047ccdf 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -243,7 +243,11 @@ typedef enum { PM_COMMENT___END__ } pm_comment_type_t; -// This is a node in the linked list of comments that we've found while parsing. +/** + * This is a node in the linked list of comments that we've found while parsing. + * + * @extends pm_list_node_t + */ typedef struct pm_comment { pm_list_node_t node; const uint8_t *start; @@ -251,8 +255,12 @@ typedef struct pm_comment { pm_comment_type_t type; } pm_comment_t; -// This is a node in the linked list of magic comments that we've found while -// parsing. +/** + * This is a node in the linked list of magic comments that we've found while + * parsing. + * + * @extends pm_list_node_t + */ typedef struct { pm_list_node_t node; const uint8_t *key_start; diff --git a/prism/prism.c b/prism/prism.c index 369cbe7f85..f90086031d 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -1,5 +1,104 @@ #include "prism.h" +/** + * @mainpage + * + * Prism is a parser for the Ruby programming language. It is designed to be + * portable, error tolerant, and maintainable. It is written in C99 and has no + * dependencies. It is currently being integrated into + * [CRuby](https://github.com/ruby/ruby), + * [JRuby](https://github.com/jruby/jruby), + * [TruffleRuby](https://github.com/oracle/truffleruby), + * [Sorbet](https://github.com/sorbet/sorbet), and + * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree). + * + * @section getting-started Getting started + * + * If you're vendoring this project and compiling it statically then as long as + * you have a C99 compiler you will be fine. If you're linking against it as + * shared library, then you should compile with `-fvisibility=hidden` and + * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface + * visible. + * + * @section parsing Parsing + * + * In order to parse Ruby code, the structures and functions that you're going + * to want to use and be aware of are: + * + * * @ref pm_parser_t - the main parser structure + * * @ref pm_parser_init - initialize a parser + * * @ref pm_parse - parse and return the root node + * * @ref pm_node_destroy - deallocate the root node returned by `pm_parse` + * * @ref pm_parser_free - free the internal memory of the parser + * + * Putting all of this together would look something like: + * + * ```c + * void parse(const uint8_t *source, size_t length) { + * pm_parser_t parser; + * pm_parser_init(&parser, source, length, NULL); + * + * pm_node_t *root = pm_parse(&parser); + * printf("PARSED!\n"); + * + * pm_node_destroy(root); + * pm_parser_free(&parser); + * } + * ``` + * + * All of the nodes "inherit" from `pm_node_t` by embedding those structures as + * their first member. This means you can downcast and upcast any node in the + * tree to a `pm_node_t`. + * + * @section serializing Serializing + * + * Prism provides the ability to serialize the AST and its related metadata into + * a binary format. This format is designed to be portable to different + * languages and runtimes so that you only need to make one FFI call in order to + * parse Ruby code. The structures and functions that you're going to want to + * use and be aware of are: + * + * * @ref pm_buffer_t - a small buffer object that will hold the serialized AST + * * @ref pm_buffer_free - free the memory associated with the buffer + * * @ref pm_serialize - serialize the AST into a buffer + * * @ref pm_parse_serialize - parse and serialize the AST into a buffer + * + * Putting all of this together would look something like: + * + * ```c + * void serialize(const uint8_t *source, size_t length) { + * pm_buffer_t buffer = { 0 }; + * + * pm_parse_serialize(source, length, &buffer, NULL); + * printf("SERIALIZED!\n"); + * + * pm_buffer_free(&buffer); + * } + * ``` + * + * @section inspecting Inspecting + * + * Prism provides the ability to inspect the AST by pretty-printing nodes. You + * can do this with the `pm_prettyprint` function, which you would use like: + * + * ```c + * void prettyprint(const uint8_t *source, size_t length) { + * pm_parser_t parser; + * pm_parser_init(&parser, source, length, NULL); + * + * pm_node_t *root = pm_parse(&parser); + * pm_buffer_t buffer = { 0 }; + * + * pm_prettyprint(&buffer, &parser, root); + * printf("*.s%\n", (int) buffer.length, buffer.value); + * + * pm_buffer_free(&buffer); + * pm_node_destroy(root); + * pm_parser_free(&parser); + * } + * ``` + */ + // The prism version and the serialization format. const char * pm_version(void) { @@ -15726,7 +15825,9 @@ pm_parser_free(pm_parser_t *parser) { } } -// Parse the Ruby source associated with the given parser and return the tree. +/** + * Parse the Ruby source associated with the given parser and return the tree. + */ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser) { return parse_program(parser); diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb index 38c01d1f05..75b10c9807 100644 --- a/prism/templates/include/prism/ast.h.erb +++ b/prism/templates/include/prism/ast.h.erb @@ -61,7 +61,10 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS #define PM_NODE_TYPE(node) ((enum pm_node_type)node->type) #define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type)) -// This is the overall tagged union representing a node in the syntax tree. +/** + * This is the base structure that represents a node in the syntax tree. It is + * embedded into every node type. + */ typedef struct pm_node { // This represents the type of the node. It somewhat maps to the nodes that // existed in the original grammar and ripper, but it's not a 1:1 mapping. @@ -76,16 +79,20 @@ typedef struct pm_node { } pm_node_t; <%- nodes.each do |node| -%> -// <%= node.name %> -// -// Type: <%= node.type %> +/** + * <%= node.name %> + * + * Type: <%= node.type %> <%- if (node_flags = node.fields.find { |field| field.is_a? Prism::FlagsField }) -%> -// Flags: + * Flags: <%- found = flags.find { |flag| flag.name == node_flags.kind }.tap { |found| raise "Expected to find #{field.kind}" unless found } -%> <%- found.values.each do |value| -%> -// PM_<%= found.human.upcase %>_<%= value.name %> + * PM_<%= found.human.upcase %>_<%= value.name %> <%- end -%> <%- end -%> + * + * @extends pm_node_t + */ typedef struct pm_<%= node.human %> { pm_node_t base; <%- node.fields.grep_v(Prism::FlagsField).each do |field| -%> diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c index bd1c93baca..6ffedc86ba 100644 --- a/prism/util/pm_string.c +++ b/prism/util/pm_string.c @@ -10,7 +10,11 @@ #include #endif -// Initialize a shared string that is based on initial input. +/** + * Initialize a shared string that is based on initial input. + * + * @memberof pm_string_t + */ void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) { assert(start <= end); @@ -22,7 +26,11 @@ pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t * }; } -// Initialize an owned string that is responsible for freeing allocated memory. +/** + * Initialize an owned string that is responsible for freeing allocated memory. + * + * @memberof pm_string_t + */ void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) { *string = (pm_string_t) { @@ -32,7 +40,11 @@ pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) { }; } -// Initialize a constant string that doesn't own its memory source. +/** + * Initialize a constant string that doesn't own its memory source. + * + * @memberof pm_string_t + */ void pm_string_constant_init(pm_string_t *string, const char *source, size_t length) { *string = (pm_string_t) { @@ -42,69 +54,19 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length) }; } -static void -pm_string_mapped_init_internal(pm_string_t *string, uint8_t *source, size_t length) { - *string = (pm_string_t) { - .type = PM_STRING_MAPPED, - .source = source, - .length = length - }; -} - -// Returns the memory size associated with the string. -size_t -pm_string_memsize(const pm_string_t *string) { - size_t size = sizeof(pm_string_t); - if (string->type == PM_STRING_OWNED) { - size += string->length; - } - return size; -} - -// Ensure the string is owned. If it is not, then reinitialize it as owned and -// copy over the previous source. -void -pm_string_ensure_owned(pm_string_t *string) { - if (string->type == PM_STRING_OWNED) return; - - size_t length = pm_string_length(string); - const uint8_t *source = pm_string_source(string); - - uint8_t *memory = malloc(length); - if (!memory) return; - - pm_string_owned_init(string, memory, length); - memcpy((void *) string->source, source, length); -} - -// Returns the length associated with the string. -PRISM_EXPORTED_FUNCTION size_t -pm_string_length(const pm_string_t *string) { - return string->length; -} - -// Returns the start pointer associated with the string. -PRISM_EXPORTED_FUNCTION const uint8_t * -pm_string_source(const pm_string_t *string) { - return string->source; -} - -// Free the associated memory of the given string. -PRISM_EXPORTED_FUNCTION void -pm_string_free(pm_string_t *string) { - void *memory = (void *) string->source; - - if (string->type == PM_STRING_OWNED) { - free(memory); - } else if (string->type == PM_STRING_MAPPED && string->length) { -#if defined(_WIN32) - UnmapViewOfFile(memory); -#else - munmap(memory, string->length); -#endif - } -} - +/** + * Read the file indicated by the filepath parameter into source and load its + * contents and size into the given `pm_string_t`. The given `pm_string_t` + * should be freed using `pm_string_free` when it is no longer used. + * + * We want to use demand paging as much as possible in order to avoid having to + * read the entire file into memory (which could be detrimental to performance + * for large files). This means that if we're on windows we'll use + * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use + * `mmap`, and on other POSIX systems we'll use `read`. + * + * @memberof pm_string_t + */ bool pm_string_mapped_init(pm_string_t *string, const char *filepath) { #ifdef _WIN32 @@ -128,8 +90,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) { // the source to a constant empty string and return. if (file_size == 0) { CloseHandle(file); - uint8_t empty[] = ""; - pm_string_mapped_init_internal(string, empty, 0); + *string = PM_EMPTY_STRING; return true; } @@ -151,7 +112,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) { return false; } - pm_string_mapped_init_internal(string, source, (size_t) file_size); + *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size }; return true; #else // Open the file for reading @@ -175,8 +136,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) { if (size == 0) { close(fd); - uint8_t empty[] = ""; - pm_string_mapped_init_internal(string, empty, 0); + *string = PM_EMPTY_STRING; return true; } @@ -187,13 +147,89 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) { } close(fd); - pm_string_mapped_init_internal(string, source, size); + *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size }; return true; #endif } -// Returns the size of the pm_string_t struct. This is necessary to allocate the -// correct amount of memory in the FFI backend. +/** + * Returns the memory size associated with the string. + * + * @memberof pm_string_t + */ +size_t +pm_string_memsize(const pm_string_t *string) { + size_t size = sizeof(pm_string_t); + if (string->type == PM_STRING_OWNED) { + size += string->length; + } + return size; +} + +/** + * Ensure the string is owned. If it is not, then reinitialize it as owned and + * copy over the previous source. + * + * @memberof pm_string_t + */ +void +pm_string_ensure_owned(pm_string_t *string) { + if (string->type == PM_STRING_OWNED) return; + + size_t length = pm_string_length(string); + const uint8_t *source = pm_string_source(string); + + uint8_t *memory = malloc(length); + if (!memory) return; + + pm_string_owned_init(string, memory, length); + memcpy((void *) string->source, source, length); +} + +/** + * Returns the length associated with the string. + * + * @memberof pm_string_t + */ +PRISM_EXPORTED_FUNCTION size_t +pm_string_length(const pm_string_t *string) { + return string->length; +} + +/** + * Returns the start pointer associated with the string. + * + * @memberof pm_string_t + */ +PRISM_EXPORTED_FUNCTION const uint8_t * +pm_string_source(const pm_string_t *string) { + return string->source; +} + +/** + * Free the associated memory of the given string. + * + * @memberof pm_string_t + */ +PRISM_EXPORTED_FUNCTION void +pm_string_free(pm_string_t *string) { + void *memory = (void *) string->source; + + if (string->type == PM_STRING_OWNED) { + free(memory); + } else if (string->type == PM_STRING_MAPPED && string->length) { +#if defined(_WIN32) + UnmapViewOfFile(memory); +#else + munmap(memory, string->length); +#endif + } +} + +/** + * Returns the size of the pm_string_t struct. This is necessary to allocate the + * correct amount of memory in the FFI backend. + */ PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void) { return sizeof(pm_string_t); diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h index ec6985282b..5f0fc7b046 100644 --- a/prism/util/pm_string.h +++ b/prism/util/pm_string.h @@ -19,44 +19,82 @@ typedef struct { #define PM_EMPTY_STRING ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 }) -// Initialize a shared string that is based on initial input. +/** + * Initialize a shared string that is based on initial input. + * + * @memberof pm_string_t + */ void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end); -// Initialize an owned string that is responsible for freeing allocated memory. +/** + * Initialize an owned string that is responsible for freeing allocated memory. + * + * @memberof pm_string_t + */ void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length); -// Initialize a constant string that doesn't own its memory source. +/** + * Initialize a constant string that doesn't own its memory source. + * + * @memberof pm_string_t + */ void pm_string_constant_init(pm_string_t *string, const char *source, size_t length); -// Read the file indicated by the filepath parameter into source and load its -// contents and size into the given pm_string_t. -// The given pm_string_t should be freed using pm_string_free() when it is no longer used. -// -// We want to use demand paging as much as possible in order to avoid having to -// read the entire file into memory (which could be detrimental to performance -// for large files). This means that if we're on windows we'll use -// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use -// `mmap`, and on other POSIX systems we'll use `read`. +/** + * Read the file indicated by the filepath parameter into source and load its + * contents and size into the given `pm_string_t`. The given `pm_string_t` + * should be freed using `pm_string_free` when it is no longer used. + * + * We want to use demand paging as much as possible in order to avoid having to + * read the entire file into memory (which could be detrimental to performance + * for large files). This means that if we're on windows we'll use + * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use + * `mmap`, and on other POSIX systems we'll use `read`. + * + * @memberof pm_string_t + */ PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath); -// Returns the memory size associated with the string. +/** + * Returns the memory size associated with the string. + * + * @memberof pm_string_t + */ size_t pm_string_memsize(const pm_string_t *string); -// Ensure the string is owned. If it is not, then reinitialize it as owned and -// copy over the previous source. +/** + * Ensure the string is owned. If it is not, then reinitialize it as owned and + * copy over the previous source. + * + * @memberof pm_string_t + */ void pm_string_ensure_owned(pm_string_t *string); -// Returns the length associated with the string. +/** + * Returns the length associated with the string. + * + * @memberof pm_string_t + */ PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string); -// Returns the start pointer associated with the string. +/** + * Returns the start pointer associated with the string. + * + * @memberof pm_string_t + */ PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string); -// Free the associated memory of the given string. +/** + * Free the associated memory of the given string. + * + * @memberof pm_string_t + */ PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string); -// Returns the size of the pm_string_t struct. This is necessary to allocate the -// correct amount of memory in the FFI backend. +/** + * Returns the size of the pm_string_t struct. This is necessary to allocate the + * correct amount of memory in the FFI backend. + */ PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void); #endif // PRISM_STRING_H -- cgit v1.2.3