aboutsummaryrefslogtreecommitdiffstats
path: root/prism
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-10-30 21:58:48 -0400
committerKevin Newton <kddnewton@gmail.com>2023-11-01 13:10:29 -0400
commit7c8d93968009d1f3123ebc78906d4abb896e6905 (patch)
tree436b1a221a87619d7e0e815edbd07ef6d9dec98f /prism
parent4490979615338ea21619c2f1287759172c9a2437 (diff)
downloadruby-7c8d93968009d1f3123ebc78906d4abb896e6905.tar.gz
[ruby/prism] Start better documenting C API
https://github.com/ruby/prism/commit/2b6e661bbc
Diffstat (limited to 'prism')
-rw-r--r--prism/diagnostic.h6
-rw-r--r--prism/parser.h14
-rw-r--r--prism/prism.c103
-rw-r--r--prism/templates/include/prism/ast.h.erb19
-rw-r--r--prism/util/pm_string.c184
-rw-r--r--prism/util/pm_string.h78
6 files changed, 299 insertions, 105 deletions
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
index 0c9e4a3fd1..273cab22e5 100644
--- a/prism/diagnostic.h
+++ b/prism/diagnostic.h
@@ -8,7 +8,11 @@
#include <stdlib.h>
#include <assert.h>
-// This struct represents a diagnostic found during parsing.
+/**
+ * This struct represents a diagnostic found during parsing.
+ *
+ * @extends pm_list_node_t
+ */
typedef struct {
pm_list_node_t node;
const uint8_t *start;
diff --git a/prism/parser.h b/prism/parser.h
index c701e595a9..01b047ccdf 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -243,7 +243,11 @@ typedef enum {
PM_COMMENT___END__
} pm_comment_type_t;
-// This is a node in the linked list of comments that we've found while parsing.
+/**
+ * This is a node in the linked list of comments that we've found while parsing.
+ *
+ * @extends pm_list_node_t
+ */
typedef struct pm_comment {
pm_list_node_t node;
const uint8_t *start;
@@ -251,8 +255,12 @@ typedef struct pm_comment {
pm_comment_type_t type;
} pm_comment_t;
-// This is a node in the linked list of magic comments that we've found while
-// parsing.
+/**
+ * This is a node in the linked list of magic comments that we've found while
+ * parsing.
+ *
+ * @extends pm_list_node_t
+ */
typedef struct {
pm_list_node_t node;
const uint8_t *key_start;
diff --git a/prism/prism.c b/prism/prism.c
index 369cbe7f85..f90086031d 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -1,5 +1,104 @@
#include "prism.h"
+/**
+ * @mainpage
+ *
+ * Prism is a parser for the Ruby programming language. It is designed to be
+ * portable, error tolerant, and maintainable. It is written in C99 and has no
+ * dependencies. It is currently being integrated into
+ * [CRuby](https://github.com/ruby/ruby),
+ * [JRuby](https://github.com/jruby/jruby),
+ * [TruffleRuby](https://github.com/oracle/truffleruby),
+ * [Sorbet](https://github.com/sorbet/sorbet), and
+ * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
+ *
+ * @section getting-started Getting started
+ *
+ * If you're vendoring this project and compiling it statically then as long as
+ * you have a C99 compiler you will be fine. If you're linking against it as
+ * shared library, then you should compile with `-fvisibility=hidden` and
+ * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
+ * visible.
+ *
+ * @section parsing Parsing
+ *
+ * In order to parse Ruby code, the structures and functions that you're going
+ * to want to use and be aware of are:
+ *
+ * * @ref pm_parser_t - the main parser structure
+ * * @ref pm_parser_init - initialize a parser
+ * * @ref pm_parse - parse and return the root node
+ * * @ref pm_node_destroy - deallocate the root node returned by `pm_parse`
+ * * @ref pm_parser_free - free the internal memory of the parser
+ *
+ * Putting all of this together would look something like:
+ *
+ * ```c
+ * void parse(const uint8_t *source, size_t length) {
+ * pm_parser_t parser;
+ * pm_parser_init(&parser, source, length, NULL);
+ *
+ * pm_node_t *root = pm_parse(&parser);
+ * printf("PARSED!\n");
+ *
+ * pm_node_destroy(root);
+ * pm_parser_free(&parser);
+ * }
+ * ```
+ *
+ * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
+ * their first member. This means you can downcast and upcast any node in the
+ * tree to a `pm_node_t`.
+ *
+ * @section serializing Serializing
+ *
+ * Prism provides the ability to serialize the AST and its related metadata into
+ * a binary format. This format is designed to be portable to different
+ * languages and runtimes so that you only need to make one FFI call in order to
+ * parse Ruby code. The structures and functions that you're going to want to
+ * use and be aware of are:
+ *
+ * * @ref pm_buffer_t - a small buffer object that will hold the serialized AST
+ * * @ref pm_buffer_free - free the memory associated with the buffer
+ * * @ref pm_serialize - serialize the AST into a buffer
+ * * @ref pm_parse_serialize - parse and serialize the AST into a buffer
+ *
+ * Putting all of this together would look something like:
+ *
+ * ```c
+ * void serialize(const uint8_t *source, size_t length) {
+ * pm_buffer_t buffer = { 0 };
+ *
+ * pm_parse_serialize(source, length, &buffer, NULL);
+ * printf("SERIALIZED!\n");
+ *
+ * pm_buffer_free(&buffer);
+ * }
+ * ```
+ *
+ * @section inspecting Inspecting
+ *
+ * Prism provides the ability to inspect the AST by pretty-printing nodes. You
+ * can do this with the `pm_prettyprint` function, which you would use like:
+ *
+ * ```c
+ * void prettyprint(const uint8_t *source, size_t length) {
+ * pm_parser_t parser;
+ * pm_parser_init(&parser, source, length, NULL);
+ *
+ * pm_node_t *root = pm_parse(&parser);
+ * pm_buffer_t buffer = { 0 };
+ *
+ * pm_prettyprint(&buffer, &parser, root);
+ * printf("*.s%\n", (int) buffer.length, buffer.value);
+ *
+ * pm_buffer_free(&buffer);
+ * pm_node_destroy(root);
+ * pm_parser_free(&parser);
+ * }
+ * ```
+ */
+
// The prism version and the serialization format.
const char *
pm_version(void) {
@@ -15726,7 +15825,9 @@ pm_parser_free(pm_parser_t *parser) {
}
}
-// Parse the Ruby source associated with the given parser and return the tree.
+/**
+ * Parse the Ruby source associated with the given parser and return the tree.
+ */
PRISM_EXPORTED_FUNCTION pm_node_t *
pm_parse(pm_parser_t *parser) {
return parse_program(parser);
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
index 38c01d1f05..75b10c9807 100644
--- a/prism/templates/include/prism/ast.h.erb
+++ b/prism/templates/include/prism/ast.h.erb
@@ -61,7 +61,10 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS
#define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
#define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type))
-// This is the overall tagged union representing a node in the syntax tree.
+/**
+ * This is the base structure that represents a node in the syntax tree. It is
+ * embedded into every node type.
+ */
typedef struct pm_node {
// This represents the type of the node. It somewhat maps to the nodes that
// existed in the original grammar and ripper, but it's not a 1:1 mapping.
@@ -76,16 +79,20 @@ typedef struct pm_node {
} pm_node_t;
<%- nodes.each do |node| -%>
-// <%= node.name %>
-//
-// Type: <%= node.type %>
+/**
+ * <%= node.name %>
+ *
+ * Type: <%= node.type %>
<%- if (node_flags = node.fields.find { |field| field.is_a? Prism::FlagsField }) -%>
-// Flags:
+ * Flags:
<%- found = flags.find { |flag| flag.name == node_flags.kind }.tap { |found| raise "Expected to find #{field.kind}" unless found } -%>
<%- found.values.each do |value| -%>
-// PM_<%= found.human.upcase %>_<%= value.name %>
+ * PM_<%= found.human.upcase %>_<%= value.name %>
<%- end -%>
<%- end -%>
+ *
+ * @extends pm_node_t
+ */
typedef struct pm_<%= node.human %> {
pm_node_t base;
<%- node.fields.grep_v(Prism::FlagsField).each do |field| -%>
diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c
index bd1c93baca..6ffedc86ba 100644
--- a/prism/util/pm_string.c
+++ b/prism/util/pm_string.c
@@ -10,7 +10,11 @@
#include <unistd.h>
#endif
-// Initialize a shared string that is based on initial input.
+/**
+ * Initialize a shared string that is based on initial input.
+ *
+ * @memberof pm_string_t
+ */
void
pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
assert(start <= end);
@@ -22,7 +26,11 @@ pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *
};
}
-// Initialize an owned string that is responsible for freeing allocated memory.
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ *
+ * @memberof pm_string_t
+ */
void
pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
*string = (pm_string_t) {
@@ -32,7 +40,11 @@ pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
};
}
-// Initialize a constant string that doesn't own its memory source.
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ *
+ * @memberof pm_string_t
+ */
void
pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
*string = (pm_string_t) {
@@ -42,69 +54,19 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length)
};
}
-static void
-pm_string_mapped_init_internal(pm_string_t *string, uint8_t *source, size_t length) {
- *string = (pm_string_t) {
- .type = PM_STRING_MAPPED,
- .source = source,
- .length = length
- };
-}
-
-// Returns the memory size associated with the string.
-size_t
-pm_string_memsize(const pm_string_t *string) {
- size_t size = sizeof(pm_string_t);
- if (string->type == PM_STRING_OWNED) {
- size += string->length;
- }
- return size;
-}
-
-// Ensure the string is owned. If it is not, then reinitialize it as owned and
-// copy over the previous source.
-void
-pm_string_ensure_owned(pm_string_t *string) {
- if (string->type == PM_STRING_OWNED) return;
-
- size_t length = pm_string_length(string);
- const uint8_t *source = pm_string_source(string);
-
- uint8_t *memory = malloc(length);
- if (!memory) return;
-
- pm_string_owned_init(string, memory, length);
- memcpy((void *) string->source, source, length);
-}
-
-// Returns the length associated with the string.
-PRISM_EXPORTED_FUNCTION size_t
-pm_string_length(const pm_string_t *string) {
- return string->length;
-}
-
-// Returns the start pointer associated with the string.
-PRISM_EXPORTED_FUNCTION const uint8_t *
-pm_string_source(const pm_string_t *string) {
- return string->source;
-}
-
-// Free the associated memory of the given string.
-PRISM_EXPORTED_FUNCTION void
-pm_string_free(pm_string_t *string) {
- void *memory = (void *) string->source;
-
- if (string->type == PM_STRING_OWNED) {
- free(memory);
- } else if (string->type == PM_STRING_MAPPED && string->length) {
-#if defined(_WIN32)
- UnmapViewOfFile(memory);
-#else
- munmap(memory, string->length);
-#endif
- }
-}
-
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * We want to use demand paging as much as possible in order to avoid having to
+ * read the entire file into memory (which could be detrimental to performance
+ * for large files). This means that if we're on windows we'll use
+ * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+ * `mmap`, and on other POSIX systems we'll use `read`.
+ *
+ * @memberof pm_string_t
+ */
bool
pm_string_mapped_init(pm_string_t *string, const char *filepath) {
#ifdef _WIN32
@@ -128,8 +90,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
// the source to a constant empty string and return.
if (file_size == 0) {
CloseHandle(file);
- uint8_t empty[] = "";
- pm_string_mapped_init_internal(string, empty, 0);
+ *string = PM_EMPTY_STRING;
return true;
}
@@ -151,7 +112,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
return false;
}
- pm_string_mapped_init_internal(string, source, (size_t) file_size);
+ *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
return true;
#else
// Open the file for reading
@@ -175,8 +136,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
if (size == 0) {
close(fd);
- uint8_t empty[] = "";
- pm_string_mapped_init_internal(string, empty, 0);
+ *string = PM_EMPTY_STRING;
return true;
}
@@ -187,13 +147,89 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
}
close(fd);
- pm_string_mapped_init_internal(string, source, size);
+ *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
return true;
#endif
}
-// Returns the size of the pm_string_t struct. This is necessary to allocate the
-// correct amount of memory in the FFI backend.
+/**
+ * Returns the memory size associated with the string.
+ *
+ * @memberof pm_string_t
+ */
+size_t
+pm_string_memsize(const pm_string_t *string) {
+ size_t size = sizeof(pm_string_t);
+ if (string->type == PM_STRING_OWNED) {
+ size += string->length;
+ }
+ return size;
+}
+
+/**
+ * Ensure the string is owned. If it is not, then reinitialize it as owned and
+ * copy over the previous source.
+ *
+ * @memberof pm_string_t
+ */
+void
+pm_string_ensure_owned(pm_string_t *string) {
+ if (string->type == PM_STRING_OWNED) return;
+
+ size_t length = pm_string_length(string);
+ const uint8_t *source = pm_string_source(string);
+
+ uint8_t *memory = malloc(length);
+ if (!memory) return;
+
+ pm_string_owned_init(string, memory, length);
+ memcpy((void *) string->source, source, length);
+}
+
+/**
+ * Returns the length associated with the string.
+ *
+ * @memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION size_t
+pm_string_length(const pm_string_t *string) {
+ return string->length;
+}
+
+/**
+ * Returns the start pointer associated with the string.
+ *
+ * @memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t *
+pm_string_source(const pm_string_t *string) {
+ return string->source;
+}
+
+/**
+ * Free the associated memory of the given string.
+ *
+ * @memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_string_free(pm_string_t *string) {
+ void *memory = (void *) string->source;
+
+ if (string->type == PM_STRING_OWNED) {
+ free(memory);
+ } else if (string->type == PM_STRING_MAPPED && string->length) {
+#if defined(_WIN32)
+ UnmapViewOfFile(memory);
+#else
+ munmap(memory, string->length);
+#endif
+ }
+}
+
+/**
+ * Returns the size of the pm_string_t struct. This is necessary to allocate the
+ * correct amount of memory in the FFI backend.
+ */
PRISM_EXPORTED_FUNCTION size_t
pm_string_sizeof(void) {
return sizeof(pm_string_t);
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
index ec6985282b..5f0fc7b046 100644
--- a/prism/util/pm_string.h
+++ b/prism/util/pm_string.h
@@ -19,44 +19,82 @@ typedef struct {
#define PM_EMPTY_STRING ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
-// Initialize a shared string that is based on initial input.
+/**
+ * Initialize a shared string that is based on initial input.
+ *
+ * @memberof pm_string_t
+ */
void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
-// Initialize an owned string that is responsible for freeing allocated memory.
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ *
+ * @memberof pm_string_t
+ */
void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
-// Initialize a constant string that doesn't own its memory source.
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ *
+ * @memberof pm_string_t
+ */
void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
-// Read the file indicated by the filepath parameter into source and load its
-// contents and size into the given pm_string_t.
-// The given pm_string_t should be freed using pm_string_free() when it is no longer used.
-//
-// We want to use demand paging as much as possible in order to avoid having to
-// read the entire file into memory (which could be detrimental to performance
-// for large files). This means that if we're on windows we'll use
-// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
-// `mmap`, and on other POSIX systems we'll use `read`.
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * We want to use demand paging as much as possible in order to avoid having to
+ * read the entire file into memory (which could be detrimental to performance
+ * for large files). This means that if we're on windows we'll use
+ * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+ * `mmap`, and on other POSIX systems we'll use `read`.
+ *
+ * @memberof pm_string_t
+ */
PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath);
-// Returns the memory size associated with the string.
+/**
+ * Returns the memory size associated with the string.
+ *
+ * @memberof pm_string_t
+ */
size_t pm_string_memsize(const pm_string_t *string);
-// Ensure the string is owned. If it is not, then reinitialize it as owned and
-// copy over the previous source.
+/**
+ * Ensure the string is owned. If it is not, then reinitialize it as owned and
+ * copy over the previous source.
+ *
+ * @memberof pm_string_t
+ */
void pm_string_ensure_owned(pm_string_t *string);
-// Returns the length associated with the string.
+/**
+ * Returns the length associated with the string.
+ *
+ * @memberof pm_string_t
+ */
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
-// Returns the start pointer associated with the string.
+/**
+ * Returns the start pointer associated with the string.
+ *
+ * @memberof pm_string_t
+ */
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
-// Free the associated memory of the given string.
+/**
+ * Free the associated memory of the given string.
+ *
+ * @memberof pm_string_t
+ */
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
-// Returns the size of the pm_string_t struct. This is necessary to allocate the
-// correct amount of memory in the FFI backend.
+/**
+ * Returns the size of the pm_string_t struct. This is necessary to allocate the
+ * correct amount of memory in the FFI backend.
+ */
PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
#endif // PRISM_STRING_H