From c351afc37276bb3d82eabe142a20be10127fed27 Mon Sep 17 00:00:00 2001 From: nobu Date: Fri, 28 Sep 2007 19:27:10 +0000 Subject: * encoding.c (rb_enc_alias): allow encodings multiple aliases. * encoding.c (rb_enc_find_index): search the encoding which has the given name and return its index if found, or -1. * st.c (type_strcasehash): case-insensitive string hash type. * string.c (rb_str_force_encoding): force encoding of self. this name comes from [ruby-dev:31894] by Martin Duerst. [ruby-dev:31744] * include/ruby/encoding.h (rb_enc_find_index, rb_enc_associate_index): prototyped. * include/ruby/encoding.h (rb_enc_isctype): direct interface to ctype. * include/ruby/st.h (st_init_strcasetable): prototyped. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13556 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 19 ++++++++++++++++++ encoding.c | 51 +++++++++++++++++++++++++++++++++++++++---------- include/ruby/encoding.h | 3 +++ include/ruby/st.h | 2 ++ st.c | 37 +++++++++++++++++++++++++++++++++++ string.c | 28 +++++++++++++++++++++++++-- 6 files changed, 128 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index f9f99cd966..e89ab8bc4e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +Sat Sep 29 04:27:08 2007 Nobuyoshi Nakada + + * encoding.c (rb_enc_alias): allow encodings multiple aliases. + + * encoding.c (rb_enc_find_index): search the encoding which has the + given name and return its index if found, or -1. + + * st.c (type_strcasehash): case-insensitive string hash type. + + * string.c (rb_str_force_encoding): force encoding of self. this name + comes from [ruby-dev:31894] by Martin Duerst. [ruby-dev:31744] + + * include/ruby/encoding.h (rb_enc_find_index, rb_enc_associate_index): + prototyped. + + * include/ruby/encoding.h (rb_enc_isctype): direct interface to ctype. + + * include/ruby/st.h (st_init_strcasetable): prototyped. + Sat Sep 29 03:53:26 2007 Koichi Sasada * cont.c: Thread local storage should be fiber local. diff --git a/encoding.c b/encoding.c index 4df874292f..6f7b406c88 100644 --- a/encoding.c +++ b/encoding.c @@ -23,6 +23,7 @@ struct rb_encoding_entry { static struct rb_encoding_entry *enc_table; static int enc_table_size; +static st_table *enc_table_alias; void rb_enc_register(const char *name, rb_encoding *encoding) @@ -42,13 +43,26 @@ rb_enc_register(const char *name, rb_encoding *encoding) ent->enc = encoding; } +void +rb_enc_alias(const char *alias, const char *orig) +{ + if (!enc_table_alias) { + enc_table_alias = st_init_strcasetable(); + } + st_insert(enc_table_alias, (st_data_t)alias, (st_data_t)orig); +} + void rb_enc_init(void) { - rb_enc_register("ascii", ONIG_ENCODING_ASCII); - rb_enc_register("sjis", ONIG_ENCODING_SJIS); - rb_enc_register("euc-jp", ONIG_ENCODING_EUC_JP); - rb_enc_register("utf-8", ONIG_ENCODING_UTF8); +#define ENC_REGISTER(enc) rb_enc_register(rb_enc_name(enc), enc) + ENC_REGISTER(ONIG_ENCODING_ASCII); + ENC_REGISTER(ONIG_ENCODING_SJIS); + ENC_REGISTER(ONIG_ENCODING_EUC_JP); + ENC_REGISTER(ONIG_ENCODING_UTF8); +#undef ENC_REGISTER + rb_enc_alias("binary", "ascii"); + rb_enc_alias("sjis", "shift_jis"); } rb_encoding * @@ -63,20 +77,37 @@ rb_enc_from_index(int index) return enc_table[index].enc; } -rb_encoding * -rb_enc_find(const char *name) +int +rb_enc_find_index(const char *name) { int i; + st_data_t alias = 0; + if (!name) return -1; if (!enc_table) { rb_enc_init(); } + find: for (i=0; i prev_char */ #define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p) +#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t) #define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c) #define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c) #define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c) diff --git a/include/ruby/st.h b/include/ruby/st.h index 93089b0d3b..96791c444d 100644 --- a/include/ruby/st.h +++ b/include/ruby/st.h @@ -71,6 +71,8 @@ st_table *st_init_numtable(void); st_table *st_init_numtable_with_size(int); st_table *st_init_strtable(void); st_table *st_init_strtable_with_size(int); +st_table *st_init_strcasetable(void); +st_table *st_init_strcasetable_with_size(int); int st_delete(st_table *, st_data_t *, st_data_t *); int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t); int st_insert(st_table *, st_data_t, st_data_t); diff --git a/st.c b/st.c index ffff261470..2418547ebb 100644 --- a/st.c +++ b/st.c @@ -52,6 +52,12 @@ static const struct st_hash_type type_strhash = { strhash, }; +static int strcasehash(const char *); +static const struct st_hash_type type_strcasehash = { + strcasecmp, + strcasehash, +}; + static void rehash(st_table *); #ifdef RUBY @@ -202,6 +208,18 @@ st_init_strtable_with_size(int size) return st_init_table_with_size(&type_strhash, size); } +st_table* +st_init_strcasetable(void) +{ + return st_init_table(&type_strcasehash); +} + +st_table* +st_init_strcasetable_with_size(int size) +{ + return st_init_table_with_size(&type_strcasehash, size); +} + void st_clear(st_table *table) { @@ -814,6 +832,25 @@ strhash(register const char *string) return hval; } +static int +strcasehash(register const char *string) +{ + register unsigned int hval = FNV1_32A_INIT; + + /* + * FNV-1a hash each octet in the buffer + */ + while (*string) { + unsigned int c = (unsigned char)*string++; + if ((unsigned int)(c - 'A') > ('Z' - 'A')) c += 'a' - 'A'; + hval ^= c; + + /* multiply by the 32 bit FNV magic prime mod 2^32 */ + hval *= FNV_32_PRIME; + } + return hval; +} + int st_numcmp(st_data_t x, st_data_t y) { diff --git a/string.c b/string.c index ab48a66bb6..690ce86856 100644 --- a/string.c +++ b/string.c @@ -228,7 +228,7 @@ rb_tainted_str_new2(const char *ptr) } static VALUE -str_new3(VALUE klass, VALUE str) +str_new_shared(VALUE klass, VALUE str) { VALUE str2 = str_alloc(klass); @@ -244,11 +244,19 @@ str_new3(VALUE klass, VALUE str) RSTRING(str2)->as.heap.aux.shared = str; FL_SET(str2, ELTS_SHARED); } - rb_enc_copy((VALUE)str2, str); return str2; } +static VALUE +str_new3(VALUE klass, VALUE str) +{ + VALUE str2 = str_new_shared(klass, str); + + rb_enc_copy(str2, str); + return str2; +} + VALUE rb_str_new3(VALUE str) { @@ -5108,6 +5116,21 @@ str_encoding(VALUE str) } +/* + * call-seq: + * str.force_encoding(encoding) => str + * + * Changes the encoding to +encoding+ and returns self. + */ + +static VALUE +rb_str_force_encoding(VALUE str, VALUE encname) +{ + str_modifiable(str); + rb_enc_associate(str, rb_enc_find(StringValueCStr(encname))); + return str; +} + /********************************************************************** * Document-class: Symbol * @@ -5519,6 +5542,7 @@ Init_String(void) rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1); rb_define_method(rb_cString, "encoding", str_encoding, 0); + rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1); id_to_s = rb_intern("to_s"); -- cgit v1.2.3