From 9c24fed339c77552223d5da339a1e58419bd0b71 Mon Sep 17 00:00:00 2001 From: nobu Date: Sat, 13 Oct 2007 16:32:40 +0000 Subject: * encoding.c (rb_cEncoding): new Encoding class. * encoding.c (rb_to_encoding, rb_to_encoding_index): helper functions. * encoding.c (rb_obj_encoding): return Encoding object now. * gc.c (garbage_collect): mark Encoding objects. * string.c (rb_str_force_encoding): accept Encoding object as well as encoding name. * include/ruby/encoding.h (rb_to_encoding_index, rb_to_encoding): prototypes. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13692 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 16 +++++ encoding.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++-- gc.c | 3 + include/ruby/encoding.h | 2 + string.c | 13 +--- version.h | 6 +- 6 files changed, 179 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index 896a2a6a02..84e270b814 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +Sun Oct 14 01:32:37 2007 Nobuyoshi Nakada + + * encoding.c (rb_cEncoding): new Encoding class. + + * encoding.c (rb_to_encoding, rb_to_encoding_index): helper functions. + + * encoding.c (rb_obj_encoding): return Encoding object now. + + * gc.c (garbage_collect): mark Encoding objects. + + * string.c (rb_str_force_encoding): accept Encoding object as well as + encoding name. + + * include/ruby/encoding.h (rb_to_encoding_index, rb_to_encoding): + prototypes. + Sun Oct 14 01:03:30 2007 Tanaka Akira * lib/open-uri.rb (OpenURI.open_http): fix :ssl_ca_cert option. diff --git a/encoding.c b/encoding.c index bee7957e6e..eecb808083 100644 --- a/encoding.c +++ b/encoding.c @@ -15,6 +15,7 @@ #include "regenc.h" static ID id_encoding; +static VALUE rb_cEncoding; struct rb_encoding_entry { const char *name; @@ -25,6 +26,92 @@ static struct rb_encoding_entry *enc_table; static int enc_table_size; static st_table *enc_table_alias; +#define ENC_UNINITIALIZED (&rb_cEncoding) +#define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding) +#define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data) + +static void +enc_mark(void *ptr) +{ +} + +static VALUE +enc_new(rb_encoding *encoding) +{ + VALUE enc = Data_Wrap_Struct(rb_cEncoding, enc_mark, -1, encoding); + encoding->auxiliary_data = (void *)enc; + return enc; +} + +static VALUE +enc_from_encoding(rb_encoding *enc) +{ + return enc_initialized_p(enc) ? ENC_FROM_ENCODING(enc) : enc_new(enc); +} + +static rb_encoding * +enc_check_encoding(VALUE obj) +{ + if (SPECIAL_CONST_P(obj) || BUILTIN_TYPE(obj) != T_DATA || + RDATA(obj)->dmark != enc_mark) { + return 0; + } + return RDATA(obj)->data; +} + +static rb_encoding * +enc_get_encoding(VALUE obj) +{ + rb_encoding *enc = enc_check_encoding(obj); + if (!enc) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding)", + rb_obj_classname(obj)); + } + return enc; +} + +int +rb_to_encoding_index(VALUE enc) +{ + rb_encoding *encoding; + + if (NIL_P(enc)) return 0; + encoding = enc_check_encoding(enc); + if (encoding) { + return rb_enc_to_index(encoding); + } + else { + return rb_enc_find_index(StringValueCStr(enc)); + } +} + +rb_encoding * +rb_to_encoding(VALUE enc) +{ + rb_encoding *encoding; + int idx; + + if (NIL_P(enc)) return rb_enc_from_index(0); + encoding = enc_check_encoding(enc); + if (encoding) return encoding; + if ((idx = rb_enc_find_index(StringValueCStr(enc))) < 0) { + rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc)); + } + return rb_enc_from_index(idx); +} + +void +rb_gc_mark_encodings(void) +{ + int i; + for (i = 0; i < enc_table_size; ++i) { + rb_encoding *enc = enc_table[i].enc; + if (enc && enc_initialized_p(enc)) { + rb_gc_mark(ENC_FROM_ENCODING(enc)); + } + } +} + int rb_enc_register(const char *name, rb_encoding *encoding) { @@ -44,7 +131,15 @@ rb_enc_register(const char *name, rb_encoding *encoding) enc_table_size = newsize; ent = &enc_table[--newsize]; ent->name = name; - ent->enc = encoding; + *(ent->enc = malloc(sizeof(rb_encoding))) = *encoding; + encoding = ent->enc; + encoding->name = name; + if (rb_cEncoding) { + enc_new(encoding); + } + else { + encoding->auxiliary_data = ENC_UNINITIALIZED; + } return newsize; } @@ -121,9 +216,9 @@ rb_enc_find_index(const char *name) rb_encoding * rb_enc_find(const char *name) { - rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(name)); - if (!enc) enc = ONIG_ENCODING_ASCII; - return enc; + int idx = rb_enc_find_index(name); + if (idx < 0) idx = 0; + return rb_enc_from_index(idx); } static int @@ -259,7 +354,7 @@ rb_enc_check(VALUE str1, VALUE str2) if (cr1 == ENC_CODERANGE_SINGLE) return rb_enc_from_index(idx2); if (cr2 == ENC_CODERANGE_SINGLE) return rb_enc_from_index(idx1); } - if (cr1 == ENC_CODERANGE_SINGLE) return ONIG_ENCODING_ASCII; + if (cr1 == ENC_CODERANGE_SINGLE) return rb_enc_from_index(0); } if (cr1 == ENC_CODERANGE_SINGLE && rb_enc_asciicompat(enc = rb_enc_from_index(idx2))) @@ -285,7 +380,11 @@ rb_enc_copy(VALUE obj1, VALUE obj2) VALUE rb_obj_encoding(VALUE obj) { - return rb_str_new2(rb_enc_name(rb_enc_get(obj))); + rb_encoding *enc = rb_enc_get(obj); + if (!enc) { + rb_raise(rb_eTypeError, "unknown encoding"); + } + return enc_from_encoding(enc); } @@ -360,3 +459,51 @@ rb_enc_tolower(int c, rb_encoding *enc) { return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c)); } + +static VALUE +enc_inspect(VALUE self) +{ + return rb_sprintf("<%s:%s>", rb_obj_classname(self), + rb_enc_name(enc_get_encoding(self))); +} + +static VALUE +enc_name(VALUE self) +{ + return rb_str_new2(rb_enc_name(enc_get_encoding(self))); +} + +static VALUE +enc_list(VALUE klass) +{ + VALUE ary = rb_ary_new2(enc_table_size); + int i; + rb_cEncoding = rb_define_class("Encoding", rb_cObject); + for (i = 0; i < enc_table_size; ++i) { + rb_encoding *enc = enc_table[i].enc; + if (enc) { + rb_ary_push(ary, enc_from_encoding(enc)); + } + } + return ary; +} + +static VALUE +enc_find(VALUE klass, VALUE enc) +{ + int idx = rb_enc_find_index(StringValueCStr(enc)); + if (idx < 0) { + rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc)); + } + return enc_from_encoding(rb_enc_from_index(idx)); +} + +void +Init_Encoding(void) +{ + rb_cEncoding = rb_define_class("Encoding", rb_cObject); + rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0); + rb_define_method(rb_cEncoding, "name", enc_name, 0); + rb_define_singleton_method(rb_cEncoding, "list", enc_list, 0); + rb_define_singleton_method(rb_cEncoding, "find", enc_find, 1); +} diff --git a/gc.c b/gc.c index 9bbca22588..0e5840e651 100644 --- a/gc.c +++ b/gc.c @@ -1456,6 +1456,8 @@ mark_current_machine_context(rb_thread_t *th) #endif } +void rb_gc_mark_encodings(void); + static int garbage_collect(void) { @@ -1490,6 +1492,7 @@ garbage_collect(void) rb_gc_mark_threads(); rb_gc_mark_symbols(); + rb_gc_mark_encodings(); /* mark protected global variables */ for (list = global_List; list; list = list->next) { diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 715c6f6d2c..6645a1e29d 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -41,6 +41,8 @@ typedef OnigEncodingType rb_encoding; int rb_enc_to_index(rb_encoding*); int rb_enc_get_index(VALUE obj); int rb_enc_find_index(const char *name); +int rb_to_encoding_index(VALUE); +rb_encoding* rb_to_encoding(VALUE); rb_encoding* rb_enc_get(VALUE); rb_encoding* rb_enc_check(VALUE,VALUE); void rb_enc_associate_index(VALUE, int); diff --git a/string.c b/string.c index 4969b7513f..19edb8cae0 100644 --- a/string.c +++ b/string.c @@ -5112,19 +5112,10 @@ rb_str_setter(VALUE val, ID id, VALUE *var) */ static VALUE -rb_str_force_encoding(VALUE str, VALUE encname) +rb_str_force_encoding(VALUE str, VALUE enc) { - const char *name; - int idx; - - if (NIL_P(encname)) { - idx = 0; - } - else if ((idx = rb_enc_find_index(name = StringValueCStr(encname))) < 0) { - rb_raise(rb_eArgError, "invalid encoding name - %s", name); - } str_modifiable(str); - rb_enc_associate_index(str, idx); + rb_enc_associate(str, rb_to_encoding(enc)); return str; } diff --git a/version.h b/version.h index 80d482482b..e6a143fa4a 100644 --- a/version.h +++ b/version.h @@ -1,7 +1,7 @@ #define RUBY_VERSION "1.9.0" -#define RUBY_RELEASE_DATE "2007-10-13" +#define RUBY_RELEASE_DATE "2007-10-14" #define RUBY_VERSION_CODE 190 -#define RUBY_RELEASE_CODE 20071013 +#define RUBY_RELEASE_CODE 20071014 #define RUBY_PATCHLEVEL 0 #define RUBY_VERSION_MAJOR 1 @@ -9,7 +9,7 @@ #define RUBY_VERSION_TEENY 0 #define RUBY_RELEASE_YEAR 2007 #define RUBY_RELEASE_MONTH 10 -#define RUBY_RELEASE_DAY 13 +#define RUBY_RELEASE_DAY 14 #ifdef RUBY_EXTERN RUBY_EXTERN const char ruby_version[]; -- cgit v1.2.3