From 7c8d93968009d1f3123ebc78906d4abb896e6905 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 30 Oct 2023 21:58:48 -0400
Subject: [ruby/prism] Start better documenting C API

https://github.com/ruby/prism/commit/2b6e661bbc
---
 prism/diagnostic.h                      |   6 +-
 prism/parser.h                          |  14 ++-
 prism/prism.c                           | 103 +++++++++++++++++-
 prism/templates/include/prism/ast.h.erb |  19 ++--
 prism/util/pm_string.c                  | 184 +++++++++++++++++++-------------
 prism/util/pm_string.h                  |  78 ++++++++++----
 6 files changed, 299 insertions(+), 105 deletions(-)

(limited to 'prism')

diff --git a/prism/diagnostic.h b/prism/diagnostic.h
index 0c9e4a3fd1..273cab22e5 100644
--- a/prism/diagnostic.h
+++ b/prism/diagnostic.h
@@ -8,7 +8,11 @@
 #include <stdlib.h>
 #include <assert.h>
 
-// This struct represents a diagnostic found during parsing.
+/**
+ * This struct represents a diagnostic found during parsing.
+ *
+ * @extends pm_list_node_t
+ */
 typedef struct {
     pm_list_node_t node;
     const uint8_t *start;
diff --git a/prism/parser.h b/prism/parser.h
index c701e595a9..01b047ccdf 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -243,7 +243,11 @@ typedef enum {
     PM_COMMENT___END__
 } pm_comment_type_t;
 
-// This is a node in the linked list of comments that we've found while parsing.
+/**
+ * This is a node in the linked list of comments that we've found while parsing.
+ *
+ * @extends pm_list_node_t
+ */
 typedef struct pm_comment {
     pm_list_node_t node;
     const uint8_t *start;
@@ -251,8 +255,12 @@ typedef struct pm_comment {
     pm_comment_type_t type;
 } pm_comment_t;
 
-// This is a node in the linked list of magic comments that we've found while
-// parsing.
+/**
+ * This is a node in the linked list of magic comments that we've found while
+ * parsing.
+ *
+ * @extends pm_list_node_t
+ */
 typedef struct {
     pm_list_node_t node;
     const uint8_t *key_start;
diff --git a/prism/prism.c b/prism/prism.c
index 369cbe7f85..f90086031d 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -1,5 +1,104 @@
 #include "prism.h"
 
+/**
+ * @mainpage
+ *
+ * Prism is a parser for the Ruby programming language. It is designed to be
+ * portable, error tolerant, and maintainable. It is written in C99 and has no
+ * dependencies. It is currently being integrated into
+ * [CRuby](https://github.com/ruby/ruby),
+ * [JRuby](https://github.com/jruby/jruby),
+ * [TruffleRuby](https://github.com/oracle/truffleruby),
+ * [Sorbet](https://github.com/sorbet/sorbet), and
+ * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
+ *
+ * @section getting-started Getting started
+ *
+ * If you're vendoring this project and compiling it statically then as long as
+ * you have a C99 compiler you will be fine. If you're linking against it as
+ * shared library, then you should compile with `-fvisibility=hidden` and
+ * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
+ * visible.
+ *
+ * @section parsing Parsing
+ * 
+ * In order to parse Ruby code, the structures and functions that you're going
+ * to want to use and be aware of are:
+ *
+ * * @ref pm_parser_t - the main parser structure
+ * * @ref pm_parser_init - initialize a parser
+ * * @ref pm_parse - parse and return the root node
+ * * @ref pm_node_destroy - deallocate the root node returned by `pm_parse`
+ * * @ref pm_parser_free - free the internal memory of the parser
+ *
+ * Putting all of this together would look something like:
+ *
+ * ```c
+ * void parse(const uint8_t *source, size_t length) {
+ *     pm_parser_t parser;
+ *     pm_parser_init(&parser, source, length, NULL);
+ *
+ *     pm_node_t *root = pm_parse(&parser);
+ *     printf("PARSED!\n");
+ *
+ *     pm_node_destroy(root);
+ *     pm_parser_free(&parser);
+ * }
+ * ```
+ *
+ * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
+ * their first member. This means you can downcast and upcast any node in the
+ * tree to a `pm_node_t`.
+ *
+ * @section serializing Serializing
+ *
+ * Prism provides the ability to serialize the AST and its related metadata into
+ * a binary format. This format is designed to be portable to different
+ * languages and runtimes so that you only need to make one FFI call in order to
+ * parse Ruby code. The structures and functions that you're going to want to
+ * use and be aware of are:
+ *
+ * * @ref pm_buffer_t - a small buffer object that will hold the serialized AST
+ * * @ref pm_buffer_free - free the memory associated with the buffer
+ * * @ref pm_serialize - serialize the AST into a buffer
+ * * @ref pm_parse_serialize - parse and serialize the AST into a buffer
+ *
+ * Putting all of this together would look something like:
+ *
+ * ```c
+ * void serialize(const uint8_t *source, size_t length) {
+ *     pm_buffer_t buffer = { 0 };
+ *
+ *     pm_parse_serialize(source, length, &buffer, NULL);
+ *     printf("SERIALIZED!\n");
+ *
+ *     pm_buffer_free(&buffer);
+ * }
+ * ```
+ *
+ * @section inspecting Inspecting
+ *
+ * Prism provides the ability to inspect the AST by pretty-printing nodes. You
+ * can do this with the `pm_prettyprint` function, which you would use like:
+ *
+ * ```c
+ * void prettyprint(const uint8_t *source, size_t length) {
+ *     pm_parser_t parser;
+ *     pm_parser_init(&parser, source, length, NULL);
+ *
+ *     pm_node_t *root = pm_parse(&parser);
+ *     pm_buffer_t buffer = { 0 };
+ *
+ *     pm_prettyprint(&buffer, &parser, root);
+ *     printf("*.s%\n", (int) buffer.length, buffer.value);
+ *
+ *     pm_buffer_free(&buffer);
+ *     pm_node_destroy(root);
+ *     pm_parser_free(&parser);
+ * }
+ * ```
+ */
+
 // The prism version and the serialization format.
 const char *
 pm_version(void) {
@@ -15726,7 +15825,9 @@ pm_parser_free(pm_parser_t *parser) {
     }
 }
 
-// Parse the Ruby source associated with the given parser and return the tree.
+/**
+ * Parse the Ruby source associated with the given parser and return the tree.
+ */
 PRISM_EXPORTED_FUNCTION pm_node_t *
 pm_parse(pm_parser_t *parser) {
     return parse_program(parser);
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
index 38c01d1f05..75b10c9807 100644
--- a/prism/templates/include/prism/ast.h.erb
+++ b/prism/templates/include/prism/ast.h.erb
@@ -61,7 +61,10 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS
 #define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
 #define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type))
 
-// This is the overall tagged union representing a node in the syntax tree.
+/**
+ * This is the base structure that represents a node in the syntax tree. It is
+ * embedded into every node type.
+ */
 typedef struct pm_node {
     // This represents the type of the node. It somewhat maps to the nodes that
     // existed in the original grammar and ripper, but it's not a 1:1 mapping.
@@ -76,16 +79,20 @@ typedef struct pm_node {
 } pm_node_t;
 <%- nodes.each do |node| -%>
 
-// <%= node.name %>
-//
-// Type: <%= node.type %>
+/**
+ * <%= node.name %>
+ *
+ * Type: <%= node.type %>
 <%- if (node_flags = node.fields.find { |field| field.is_a? Prism::FlagsField }) -%>
-// Flags:
+ * Flags:
 <%- found = flags.find { |flag| flag.name == node_flags.kind }.tap { |found| raise "Expected to find #{field.kind}" unless found } -%>
 <%- found.values.each do |value| -%>
-//    PM_<%= found.human.upcase %>_<%= value.name %>
+ *    PM_<%= found.human.upcase %>_<%= value.name %>
 <%- end -%>
 <%- end -%>
+ *
+ * @extends pm_node_t
+ */
 typedef struct pm_<%= node.human %> {
     pm_node_t base;
 <%- node.fields.grep_v(Prism::FlagsField).each do |field| -%>
diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c
index bd1c93baca..6ffedc86ba 100644
--- a/prism/util/pm_string.c
+++ b/prism/util/pm_string.c
@@ -10,7 +10,11 @@
 #include <unistd.h>
 #endif
 
-// Initialize a shared string that is based on initial input.
+/**
+ * Initialize a shared string that is based on initial input.
+ *
+ * @memberof pm_string_t
+ */
 void
 pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
     assert(start <= end);
@@ -22,7 +26,11 @@ pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *
     };
 }
 
-// Initialize an owned string that is responsible for freeing allocated memory.
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ *
+ * @memberof pm_string_t
+ */
 void
 pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
     *string = (pm_string_t) {
@@ -32,7 +40,11 @@ pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
     };
 }
 
-// Initialize a constant string that doesn't own its memory source.
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ *
+ * @memberof pm_string_t
+ */
 void
 pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
     *string = (pm_string_t) {
@@ -42,69 +54,19 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length)
     };
 }
 
-static void
-pm_string_mapped_init_internal(pm_string_t *string, uint8_t *source, size_t length) {
-    *string = (pm_string_t) {
-        .type = PM_STRING_MAPPED,
-        .source = source,
-        .length = length
-    };
-}
-
-// Returns the memory size associated with the string.
-size_t
-pm_string_memsize(const pm_string_t *string) {
-    size_t size = sizeof(pm_string_t);
-    if (string->type == PM_STRING_OWNED) {
-        size += string->length;
-    }
-    return size;
-}
-
-// Ensure the string is owned. If it is not, then reinitialize it as owned and
-// copy over the previous source.
-void
-pm_string_ensure_owned(pm_string_t *string) {
-    if (string->type == PM_STRING_OWNED) return;
-
-    size_t length = pm_string_length(string);
-    const uint8_t *source = pm_string_source(string);
-
-    uint8_t *memory = malloc(length);
-    if (!memory) return;
-
-    pm_string_owned_init(string, memory, length);
-    memcpy((void *) string->source, source, length);
-}
-
-// Returns the length associated with the string.
-PRISM_EXPORTED_FUNCTION size_t
-pm_string_length(const pm_string_t *string) {
-    return string->length;
-}
-
-// Returns the start pointer associated with the string.
-PRISM_EXPORTED_FUNCTION const uint8_t *
-pm_string_source(const pm_string_t *string) {
-    return string->source;
-}
-
-// Free the associated memory of the given string.
-PRISM_EXPORTED_FUNCTION void
-pm_string_free(pm_string_t *string) {
-    void *memory = (void *) string->source;
-
-    if (string->type == PM_STRING_OWNED) {
-        free(memory);
-    } else if (string->type == PM_STRING_MAPPED && string->length) {
-#if defined(_WIN32)
-        UnmapViewOfFile(memory);
-#else
-        munmap(memory, string->length);
-#endif
-    }
-}
-
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * We want to use demand paging as much as possible in order to avoid having to
+ * read the entire file into memory (which could be detrimental to performance
+ * for large files). This means that if we're on windows we'll use
+ * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+ * `mmap`, and on other POSIX systems we'll use `read`.
+ *
+ * @memberof pm_string_t
+ */
 bool
 pm_string_mapped_init(pm_string_t *string, const char *filepath) {
 #ifdef _WIN32
@@ -128,8 +90,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
     // the source to a constant empty string and return.
     if (file_size == 0) {
         CloseHandle(file);
-        uint8_t empty[] = "";
-        pm_string_mapped_init_internal(string, empty, 0);
+        *string = PM_EMPTY_STRING;
         return true;
     }
 
@@ -151,7 +112,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
         return false;
     }
 
-    pm_string_mapped_init_internal(string, source, (size_t) file_size);
+    *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
     return true;
 #else
     // Open the file for reading
@@ -175,8 +136,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
 
     if (size == 0) {
         close(fd);
-        uint8_t empty[] = "";
-        pm_string_mapped_init_internal(string, empty, 0);
+        *string = PM_EMPTY_STRING;
         return true;
     }
 
@@ -187,13 +147,89 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
     }
 
     close(fd);
-    pm_string_mapped_init_internal(string, source, size);
+    *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
     return true;
 #endif
 }
 
-// Returns the size of the pm_string_t struct. This is necessary to allocate the
-// correct amount of memory in the FFI backend.
+/**
+ * Returns the memory size associated with the string.
+ *
+ * @memberof pm_string_t
+ */
+size_t
+pm_string_memsize(const pm_string_t *string) {
+    size_t size = sizeof(pm_string_t);
+    if (string->type == PM_STRING_OWNED) {
+        size += string->length;
+    }
+    return size;
+}
+
+/**
+ * Ensure the string is owned. If it is not, then reinitialize it as owned and
+ * copy over the previous source.
+ *
+ * @memberof pm_string_t
+ */
+void
+pm_string_ensure_owned(pm_string_t *string) {
+    if (string->type == PM_STRING_OWNED) return;
+
+    size_t length = pm_string_length(string);
+    const uint8_t *source = pm_string_source(string);
+
+    uint8_t *memory = malloc(length);
+    if (!memory) return;
+
+    pm_string_owned_init(string, memory, length);
+    memcpy((void *) string->source, source, length);
+}
+
+/**
+ * Returns the length associated with the string.
+ *
+ * @memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION size_t
+pm_string_length(const pm_string_t *string) {
+    return string->length;
+}
+
+/**
+ * Returns the start pointer associated with the string.
+ *
+ * @memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t *
+pm_string_source(const pm_string_t *string) {
+    return string->source;
+}
+
+/**
+ * Free the associated memory of the given string.
+ *
+ * @memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_string_free(pm_string_t *string) {
+    void *memory = (void *) string->source;
+
+    if (string->type == PM_STRING_OWNED) {
+        free(memory);
+    } else if (string->type == PM_STRING_MAPPED && string->length) {
+#if defined(_WIN32)
+        UnmapViewOfFile(memory);
+#else
+        munmap(memory, string->length);
+#endif
+    }
+}
+
+/**
+ * Returns the size of the pm_string_t struct. This is necessary to allocate the
+ * correct amount of memory in the FFI backend.
+ */
 PRISM_EXPORTED_FUNCTION size_t
 pm_string_sizeof(void) {
     return sizeof(pm_string_t);
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
index ec6985282b..5f0fc7b046 100644
--- a/prism/util/pm_string.h
+++ b/prism/util/pm_string.h
@@ -19,44 +19,82 @@ typedef struct {
 
 #define PM_EMPTY_STRING ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
 
-// Initialize a shared string that is based on initial input.
+/**
+ * Initialize a shared string that is based on initial input.
+ *
+ * @memberof pm_string_t
+ */
 void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
 
-// Initialize an owned string that is responsible for freeing allocated memory.
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ *
+ * @memberof pm_string_t
+ */
 void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
 
-// Initialize a constant string that doesn't own its memory source.
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ *
+ * @memberof pm_string_t
+ */
 void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
 
-// Read the file indicated by the filepath parameter into source and load its
-// contents and size into the given pm_string_t.
-// The given pm_string_t should be freed using pm_string_free() when it is no longer used.
-//
-// We want to use demand paging as much as possible in order to avoid having to
-// read the entire file into memory (which could be detrimental to performance
-// for large files). This means that if we're on windows we'll use
-// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
-// `mmap`, and on other POSIX systems we'll use `read`.
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * We want to use demand paging as much as possible in order to avoid having to
+ * read the entire file into memory (which could be detrimental to performance
+ * for large files). This means that if we're on windows we'll use
+ * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+ * `mmap`, and on other POSIX systems we'll use `read`.
+ *
+ * @memberof pm_string_t
+ */
 PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath);
 
-// Returns the memory size associated with the string.
+/**
+ * Returns the memory size associated with the string.
+ *
+ * @memberof pm_string_t
+ */
 size_t pm_string_memsize(const pm_string_t *string);
 
-// Ensure the string is owned. If it is not, then reinitialize it as owned and
-// copy over the previous source.
+/**
+ * Ensure the string is owned. If it is not, then reinitialize it as owned and
+ * copy over the previous source.
+ *
+ * @memberof pm_string_t
+ */
 void pm_string_ensure_owned(pm_string_t *string);
 
-// Returns the length associated with the string.
+/**
+ * Returns the length associated with the string.
+ *
+ * @memberof pm_string_t
+ */
 PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
 
-// Returns the start pointer associated with the string.
+/**
+ * Returns the start pointer associated with the string.
+ *
+ * @memberof pm_string_t
+ */
 PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
 
-// Free the associated memory of the given string.
+/**
+ * Free the associated memory of the given string.
+ *
+ * @memberof pm_string_t
+ */
 PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
 
-// Returns the size of the pm_string_t struct. This is necessary to allocate the
-// correct amount of memory in the FFI backend.
+/**
+ * Returns the size of the pm_string_t struct. This is necessary to allocate the
+ * correct amount of memory in the FFI backend.
+ */
 PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
 
 #endif // PRISM_STRING_H
-- 
cgit v1.2.3