From 15335f8aaa8282fd951935135aa04b3d938a267c Mon Sep 17 00:00:00 2001 From: tenderlove Date: Wed, 19 May 2010 03:12:03 +0000 Subject: * ext/psych/emitter.c: using xmalloc and xfree for memory allocation * ext/psych/lib/psych/nodes/stream.rb: encoding should be read / write * ext/psych/parser.c: supporting UTF-16 and UTF-16 + BOM * test/psych/test_parser.rb: testing UTF-16 and UTF-16 + BOM git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27881 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/psych/emitter.c | 12 ++++-- ext/psych/lib/psych/nodes/stream.rb | 2 +- ext/psych/parser.c | 76 ++++++++++++++++++++++--------------- 3 files changed, 56 insertions(+), 34 deletions(-) (limited to 'ext/psych') diff --git a/ext/psych/emitter.c b/ext/psych/emitter.c index befa98e821..a06304aba6 100644 --- a/ext/psych/emitter.c +++ b/ext/psych/emitter.c @@ -17,15 +17,21 @@ static int writer(void *ctx, unsigned char *buffer, size_t size) return (int)NUM2INT(wrote); } -static void dealloc(yaml_emitter_t * emitter) +static void dealloc(void * ptr) { + yaml_emitter_t * emitter; + + emitter = (yaml_emitter_t *)ptr; yaml_emitter_delete(emitter); - free(emitter); + xfree(emitter); } static VALUE allocate(VALUE klass) { - yaml_emitter_t * emitter = malloc(sizeof(yaml_emitter_t)); + yaml_emitter_t * emitter; + + emitter = xmalloc(sizeof(yaml_emitter_t)); + yaml_emitter_initialize(emitter); yaml_emitter_set_unicode(emitter, 1); yaml_emitter_set_indent(emitter, 2); diff --git a/ext/psych/lib/psych/nodes/stream.rb b/ext/psych/lib/psych/nodes/stream.rb index f4aab5a7dc..7cf5e033ec 100644 --- a/ext/psych/lib/psych/nodes/stream.rb +++ b/ext/psych/lib/psych/nodes/stream.rb @@ -21,7 +21,7 @@ module Psych UTF16BE = Psych::Parser::UTF16BE # The encoding used for this stream - attr_reader :encoding + attr_accessor :encoding ### # Create a new Psych::Nodes::Stream node with an +encoding+ that diff --git a/ext/psych/parser.c b/ext/psych/parser.c index eefb8e67bc..41260ab6f5 100644 --- a/ext/psych/parser.c +++ b/ext/psych/parser.c @@ -39,6 +39,25 @@ static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read) return 1; } +static void dealloc(void * ptr) +{ + yaml_parser_t * parser; + + parser = (yaml_parser_t *)ptr; + yaml_parser_delete(parser); + xfree(parser); +} + +static VALUE allocate(VALUE klass) +{ + yaml_parser_t * parser; + + parser = xmalloc(sizeof(yaml_parser_t)); + yaml_parser_initialize(parser); + + return Data_Wrap_Struct(klass, 0, dealloc, parser); +} + /* * call-seq: * parser.parse(yaml) @@ -50,35 +69,33 @@ static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read) */ static VALUE parse(VALUE self, VALUE yaml) { - yaml_parser_t parser; + yaml_parser_t * parser; yaml_event_t event; int done = 0; #ifdef HAVE_RUBY_ENCODING_H - int encoding = rb_enc_find_index("ASCII-8BIT"); - rb_encoding * internal_enc = 0; + int encoding = rb_utf8_encindex(); + rb_encoding * internal_enc = rb_default_internal_encoding(); #endif VALUE handler = rb_iv_get(self, "@handler"); - - yaml_parser_initialize(&parser); + Data_Get_Struct(self, yaml_parser_t, parser); if(rb_respond_to(yaml, id_read)) { - yaml_parser_set_input(&parser, io_reader, (void *)yaml); + yaml_parser_set_input(parser, io_reader, (void *)yaml); } else { StringValue(yaml); yaml_parser_set_input_string( - &parser, + parser, (const unsigned char *)RSTRING_PTR(yaml), (size_t)RSTRING_LEN(yaml) ); } while(!done) { - if(!yaml_parser_parse(&parser, &event)) { - size_t line = parser.mark.line; - size_t column = parser.mark.column; + if(!yaml_parser_parse(parser, &event)) { + size_t line = parser->mark.line; + size_t column = parser->mark.column; - yaml_parser_delete(&parser); rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d", (int)line, (int)column); } @@ -86,25 +103,6 @@ static VALUE parse(VALUE self, VALUE yaml) switch(event.type) { case YAML_STREAM_START_EVENT: -#ifdef HAVE_RUBY_ENCODING_H - switch(event.data.stream_start.encoding) { - case YAML_ANY_ENCODING: - break; - case YAML_UTF8_ENCODING: - encoding = rb_enc_find_index("UTF-8"); - break; - case YAML_UTF16LE_ENCODING: - encoding = rb_enc_find_index("UTF-16LE"); - break; - case YAML_UTF16BE_ENCODING: - encoding = rb_enc_find_index("UTF-16BE"); - break; - default: - break; - } - internal_enc = rb_default_internal_encoding(); -#endif - rb_funcall(handler, id_start_stream, 1, INT2NUM((long)event.data.stream_start.encoding) ); @@ -286,6 +284,22 @@ static VALUE parse(VALUE self, VALUE yaml) return self; } +/* + * call-seq: + * parser.external_encoding=(encoding) + * + * Set the encoding for this parser to +encoding+ + */ +static VALUE set_external_encoding(VALUE self, VALUE encoding) +{ + yaml_parser_t * parser; + + Data_Get_Struct(self, yaml_parser_t, parser); + yaml_parser_set_encoding(parser, NUM2INT(encoding)); + + return encoding; +} + void Init_psych_parser() { #if 0 @@ -293,6 +307,7 @@ void Init_psych_parser() #endif cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject); + rb_define_alloc_func(cPsychParser, allocate); /* Any encoding: Let the parser choose the encoding */ rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING)); @@ -309,6 +324,7 @@ void Init_psych_parser() ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError); rb_define_method(cPsychParser, "parse", parse, 1); + rb_define_method(cPsychParser, "external_encoding=", set_external_encoding, 1); id_read = rb_intern("read"); id_empty = rb_intern("empty"); -- cgit v1.2.3