aboutsummaryrefslogtreecommitdiffstats
path: root/encoding.c
diff options
context:
space:
mode:
authorKoichi Sasada <ko1@atdot.net>2022-12-15 17:53:55 +0900
committerKoichi Sasada <ko1@atdot.net>2022-12-16 10:04:37 +0900
commitae19ac5b5ba1e613a788addeb6d4d67fa65f6518 (patch)
treea431cdff5e73e1b48edb1a2f0b30578162736e68 /encoding.c
parent15b60bb1a48b98e2ba267c88551803df5f88355e (diff)
downloadruby-ae19ac5b5ba1e613a788addeb6d4d67fa65f6518.tar.gz
fixed encoding table
This reduces global lock acquiring for reading. https://bugs.ruby-lang.org/issues/18949
Diffstat (limited to 'encoding.c')
-rw-r--r--encoding.c154
1 files changed, 42 insertions, 112 deletions
diff --git a/encoding.c b/encoding.c
index b8e7f790b8..7c09939ef4 100644
--- a/encoding.c
+++ b/encoding.c
@@ -56,9 +56,8 @@ int rb_encdb_alias(const char *alias, const char *orig);
static ID id_encoding;
VALUE rb_cEncoding;
-#define DEFAULT_ENCODING_LIST_CAPA 128
-static VALUE rb_default_encoding_list;
-static VALUE rb_additional_encoding_list;
+#define ENCODING_LIST_CAPA 256
+static VALUE rb_encoding_list;
struct rb_encoding_entry {
const char *name;
@@ -67,9 +66,8 @@ struct rb_encoding_entry {
};
static struct enc_table {
- struct rb_encoding_entry *list;
+ struct rb_encoding_entry list[ENCODING_LIST_CAPA];
int count;
- int size;
st_table *names;
} global_enc_table;
@@ -128,47 +126,25 @@ enc_new(rb_encoding *encoding)
static void
enc_list_update(int index, rb_raw_encoding *encoding)
{
- if (index < DEFAULT_ENCODING_LIST_CAPA) {
- VALUE list = rb_default_encoding_list;
- if (list && NIL_P(rb_ary_entry(list, index))) {
- /* initialize encoding data */
- rb_ary_store(list, index, enc_new(encoding));
- }
- }
- else {
- RB_VM_LOCK_ENTER();
- {
- VALUE list = rb_additional_encoding_list;
- if (list && NIL_P(rb_ary_entry(list, index))) {
- /* initialize encoding data */
- rb_ary_store(list, index - DEFAULT_ENCODING_LIST_CAPA, enc_new(encoding));
- }
- }
- RB_VM_LOCK_LEAVE();
+ RUBY_ASSERT(index < ENCODING_LIST_CAPA);
+
+ VALUE list = rb_encoding_list;
+ if (list && NIL_P(rb_ary_entry(list, index))) {
+ /* initialize encoding data */
+ rb_ary_store(list, index, enc_new(encoding));
}
}
static VALUE
enc_list_lookup(int idx)
{
- VALUE list, enc;
+ VALUE list, enc = Qnil;
- if (idx < DEFAULT_ENCODING_LIST_CAPA) {
- if (!(list = rb_default_encoding_list)) {
- rb_bug("rb_enc_from_encoding_index(%d): no rb_default_encoding_list", idx);
- }
+ if (idx < ENCODING_LIST_CAPA) {
+ list = rb_encoding_list;
+ RUBY_ASSERT(list);
enc = rb_ary_entry(list, idx);
}
- else {
- RB_VM_LOCK_ENTER();
- {
- if (!(list = rb_additional_encoding_list)) {
- rb_bug("rb_enc_from_encoding_index(%d): no rb_additional_encoding_list", idx);
- }
- enc = rb_ary_entry(list, idx - DEFAULT_ENCODING_LIST_CAPA);
- }
- RB_VM_LOCK_LEAVE();
- }
if (NIL_P(enc)) {
rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
@@ -345,16 +321,10 @@ rb_find_encoding(VALUE enc)
static int
enc_table_expand(struct enc_table *enc_table, int newsize)
{
- struct rb_encoding_entry *ent;
- int count = newsize;
-
- if (enc_table->size >= newsize) return newsize;
- newsize = (newsize + 7) / 8 * 8;
- ent = REALLOC_N(enc_table->list, struct rb_encoding_entry, newsize);
- memset(ent + enc_table->size, 0, sizeof(*ent)*(newsize - enc_table->size));
- enc_table->list = ent;
- enc_table->size = newsize;
- return count;
+ if (newsize > ENCODING_LIST_CAPA) {
+ rb_raise(rb_eEncodingError, "too many encoding (> %d)", ENCODING_LIST_CAPA);
+ }
+ return newsize;
}
static int
@@ -413,17 +383,7 @@ enc_from_index(struct enc_table *enc_table, int index)
rb_encoding *
rb_enc_from_index(int index)
{
- rb_encoding *enc;
-
- switch (index) {
- case ENCINDEX_ASCII_8BIT: return global_enc_ascii;
- case ENCINDEX_UTF_8: return global_enc_utf_8;
- case ENCINDEX_US_ASCII: return global_enc_us_ascii;
- default:
- GLOBAL_ENC_TABLE_EVAL(enc_table,
- enc = enc_from_index(enc_table, index));
- return enc;
- }
+ return enc_from_index(&global_enc_table, index);
}
int
@@ -484,11 +444,14 @@ rb_encdb_declare(const char *name)
}
static void
-enc_check_duplication(struct enc_table *enc_table, const char *name)
+enc_check_addable(struct enc_table *enc_table, const char *name)
{
if (enc_registered(enc_table, name) >= 0) {
rb_raise(rb_eArgError, "encoding %s is already registered", name);
}
+ else if (!valid_encoding_name_p(name)) {
+ rb_raise(rb_eArgError, "invalid encoding name: %s", name);
+ }
}
static rb_encoding*
@@ -524,11 +487,7 @@ rb_enc_set_base(const char *name, const char *orig)
int
rb_enc_set_dummy(int index)
{
- rb_encoding *enc;
-
- GLOBAL_ENC_TABLE_EVAL(enc_table,
- enc = enc_table->list[index].enc);
-
+ rb_encoding *enc = global_enc_table.list[index].enc;
ENC_SET_DUMMY((rb_raw_encoding *)enc);
return index;
}
@@ -538,7 +497,7 @@ enc_replicate(struct enc_table *enc_table, const char *name, rb_encoding *encodi
{
int idx;
- enc_check_duplication(enc_table, name);
+ enc_check_addable(enc_table, name);
idx = enc_register(enc_table, name, encoding);
if (idx < 0) rb_raise(rb_eArgError, "invalid encoding name: %s", name);
set_base_encoding(enc_table, idx, encoding);
@@ -727,7 +686,7 @@ rb_enc_alias(const char *alias, const char *orig)
GLOBAL_ENC_TABLE_ENTER(enc_table);
{
- enc_check_duplication(enc_table, alias);
+ enc_check_addable(enc_table, alias);
if ((idx = rb_enc_find_index(orig)) < 0) {
r = -1;
}
@@ -764,7 +723,7 @@ rb_enc_init(struct enc_table *enc_table)
{
enc_table_expand(enc_table, ENCODING_COUNT + 1);
if (!enc_table->names) {
- enc_table->names = st_init_strcasetable();
+ enc_table->names = st_init_strcasetable_with_size(ENCODING_LIST_CAPA);
}
#define OnigEncodingASCII_8BIT OnigEncodingASCII
#define ENC_REGISTER(enc) enc_register_at(enc_table, ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
@@ -877,11 +836,9 @@ rb_enc_autoload(rb_encoding *enc)
int
rb_enc_find_index(const char *name)
{
- int i;
+ int i = enc_registered(&global_enc_table, name);
rb_encoding *enc;
- GLOBAL_ENC_TABLE_EVAL(enc_table, i = enc_registered(enc_table, name));
-
if (i < 0) {
i = load_encoding(name);
}
@@ -1368,10 +1325,7 @@ enc_names(VALUE self)
args[0] = (VALUE)rb_to_encoding_index(self);
args[1] = rb_ary_new2(0);
-
- GLOBAL_ENC_TABLE_EVAL(enc_table,
- st_foreach(enc_table->names, enc_names_i, (st_data_t)args));
-
+ st_foreach(global_enc_table.names, enc_names_i, (st_data_t)args);
return args[1];
}
@@ -1397,14 +1351,7 @@ static VALUE
enc_list(VALUE klass)
{
VALUE ary = rb_ary_new2(0);
-
- RB_VM_LOCK_ENTER();
- {
- rb_ary_replace(ary, rb_default_encoding_list);
- rb_ary_concat(ary, rb_additional_encoding_list);
- }
- RB_VM_LOCK_LEAVE();
-
+ rb_ary_replace(ary, rb_encoding_list);
return ary;
}
@@ -1553,15 +1500,17 @@ rb_locale_encindex(void)
if (idx < 0) idx = ENCINDEX_UTF_8;
- GLOBAL_ENC_TABLE_ENTER(enc_table);
- if (enc_registered(enc_table, "locale") < 0) {
+ if (enc_registered(&global_enc_table, "locale") < 0) {
# if defined _WIN32
void Init_w32_codepage(void);
Init_w32_codepage();
# endif
- enc_alias_internal(enc_table, "locale", idx);
+ GLOBAL_ENC_TABLE_ENTER(enc_table);
+ {
+ enc_alias_internal(enc_table, "locale", idx);
+ }
+ GLOBAL_ENC_TABLE_LEAVE();
}
- GLOBAL_ENC_TABLE_LEAVE();
return idx;
}
@@ -1575,13 +1524,8 @@ rb_locale_encoding(void)
int
rb_filesystem_encindex(void)
{
- int idx;
-
- GLOBAL_ENC_TABLE_EVAL(enc_table,
- idx = enc_registered(enc_table, "filesystem"));
-
- if (idx < 0)
- idx = ENCINDEX_ASCII_8BIT;
+ int idx = enc_registered(&global_enc_table, "filesystem");
+ if (idx < 0) idx = ENCINDEX_ASCII_8BIT;
return idx;
}
@@ -1872,15 +1816,8 @@ rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
static VALUE
rb_enc_name_list(VALUE klass)
{
- VALUE ary;
-
- GLOBAL_ENC_TABLE_ENTER(enc_table);
- {
- ary = rb_ary_new2(enc_table->names->num_entries);
- st_foreach(enc_table->names, rb_enc_name_list_i, (st_data_t)ary);
- }
- GLOBAL_ENC_TABLE_LEAVE();
-
+ VALUE ary = rb_ary_new2(global_enc_table.names->num_entries);
+ st_foreach(global_enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
return ary;
}
@@ -1926,8 +1863,7 @@ rb_enc_aliases(VALUE klass)
aliases[0] = rb_hash_new();
aliases[1] = rb_ary_new();
- GLOBAL_ENC_TABLE_EVAL(enc_table,
- st_foreach(enc_table->names, rb_enc_aliases_enc_i, (st_data_t)aliases));
+ st_foreach(global_enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
return aliases[0];
}
@@ -1996,13 +1932,7 @@ Init_Encoding(void)
struct enc_table *enc_table = &global_enc_table;
- if (DEFAULT_ENCODING_LIST_CAPA < enc_table->count) rb_bug("DEFAULT_ENCODING_LIST_CAPA is too small");
-
- list = rb_additional_encoding_list = rb_ary_new();
- RBASIC_CLEAR_CLASS(list);
- rb_gc_register_mark_object(list);
-
- list = rb_default_encoding_list = rb_ary_new2(DEFAULT_ENCODING_LIST_CAPA);
+ list = rb_encoding_list = rb_ary_new2(ENCODING_LIST_CAPA);
RBASIC_CLEAR_CLASS(list);
rb_gc_register_mark_object(list);
@@ -2024,5 +1954,5 @@ Init_encodings(void)
void
rb_enc_foreach_name(int (*func)(st_data_t name, st_data_t idx, st_data_t arg), st_data_t arg)
{
- GLOBAL_ENC_TABLE_EVAL(enc_table, st_foreach(enc_table->names, func, arg));
+ st_foreach(global_enc_table.names, func, arg);
}