ruby · soutaro · Dec 17, 2025 · Oct 7, 2025 · Nov 3, 2025 · Dec 17, 2025
diff --git a/docs/encoding.md b/docs/encoding.md
@@ -0,0 +1,56 @@
+# RBS File Encoding
+
+## Best Practice
+
+**Use UTF-8** for both file encoding and your system locale.
+
+## Supported Encodings
+
+RBS parser supports ASCII-compatible encodings (similar to Ruby's script encoding support).
+
+**Examples**: UTF-8, US-ASCII, Shift JIS, EUC-JP, ...
+
+## Unicode Codepoint Symbols
+
+String literal types in RBS can contain Unicode codepoint escape sequences (`\uXXXX`).
+
+When the file encoding is UTF-8, the parser translates Unicode codepoint symbols:
+
+```rbs
+# In UTF-8 encoded files
+
+type t = "\u0123"  # Translated to the actual Unicode character ģ
+type s = "\u3042"  # Translated to the actual Unicode character あ
+```
+
+When the file encoding is not UTF-8, Unicode escape sequences are interpreted literally as the string `\uXXXX`:
+
+```rbs
+# In non-UTF-8 encoded files
+
+type t = "\u0123"  # Remains as the literal string "\u0123"
+```
+
+## Implementation
+
+RBS gem currently doesn't do anything for file encoding. It relies on Ruby's encoding handling, specifically `Encoding.default_external` and `Encoding.default_internal`.
+
+`Encoding.default_external` is the encoding Ruby assumes when it reads external resources like files. The Ruby interpreter sets it based on the locale. `Encoding.default_internal` is the encoding Ruby converts the external resources to. The default is `nil` (no conversion.)
+
+When your locale is set to use `UTF-8` encoding, `default_external` is `Encoding::UTF_8`. So the RBS file content read from the disk will have UTF-8 encoding.
+
+### Parsing non UTF-8 RBS source text
+
+If you want to work with another encoding, ensure the source string has ASCII compatible encoding.
+
+```ruby
+source = '"日本語"'
+RBS::Parser.parse_type(source.encode(Encoding::EUC_JP))  # => Parses successfully
+RBS::Parser.parse_type(source.encode(Encoding::UTF_32))  # => Returns `nil` since UTF-32 is not ASCII compatible
+```
+
+### Specifying file encoding
+
+Currently, RBS doesn't support specifying file encoding directly.
+
+You can use `Encoding.default_external` while the gem loads RBS files from the storage.
diff --git a/ext/rbs_extension/class_constants.c b/ext/rbs_extension/class_constants.c
@@ -7,8 +7,6 @@
 
 #include "rbs_extension.h"
 
-VALUE RBS_Parser;
-
 VALUE RBS;
 VALUE RBS_AST;
 VALUE RBS_AST_Declarations;

diff --git a/ext/rbs_extension/legacy_location.c b/ext/rbs_extension/legacy_location.c
@@ -33,7 +33,7 @@ void rbs_loc_legacy_alloc_children(rbs_loc *loc, unsigned short cap) {
     check_children_max(cap);
 
     size_t s = RBS_LOC_CHILDREN_SIZE(cap);
-    loc->children = malloc(s);
+    loc->children = (rbs_loc_children *) malloc(s);
 
     *loc->children = (rbs_loc_children) {
         .len = 0,
@@ -50,7 +50,7 @@ static void check_children_cap(rbs_loc *loc) {
         if (loc->children->len == loc->children->cap) {
             check_children_max(loc->children->cap + 1);
             size_t s = RBS_LOC_CHILDREN_SIZE(++loc->children->cap);
-            loc->children = realloc(loc->children, s);
+            loc->children = (rbs_loc_children *) realloc(loc->children, s);
         }
     }
 }
@@ -86,12 +86,12 @@ void rbs_loc_free(rbs_loc *loc) {
 }
 
 static void rbs_loc_mark(void *ptr) {
-    rbs_loc *loc = ptr;
+    rbs_loc *loc = (rbs_loc *) ptr;
     rb_gc_mark(loc->buffer);
 }
 
 static size_t rbs_loc_memsize(const void *ptr) {
-    const rbs_loc *loc = ptr;
+    const rbs_loc *loc = (const rbs_loc *) ptr;
     if (loc->children == NULL) {
         return sizeof(rbs_loc);
     } else {
@@ -117,7 +117,7 @@ static VALUE location_s_allocate(VALUE klass) {
 }
 
 rbs_loc *rbs_check_location(VALUE obj) {
-    return rb_check_typeddata(obj, &location_type);
+    return (rbs_loc *) rb_check_typeddata(obj, &location_type);
 }
 
 static VALUE location_initialize(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos) {

diff --git a/ext/rbs_extension/main.c b/ext/rbs_extension/main.c
@@ -187,18 +187,10 @@ static VALUE parse_method_type_try(VALUE a) {
     }
 
     rbs_method_type_t *method_type = NULL;
-    rbs_parse_method_type(parser, &method_type);
+    rbs_parse_method_type(parser, &method_type, RB_TEST(arg->require_eof));
 
     raise_error_if_any(parser, arg->buffer);
 
-    if (RB_TEST(arg->require_eof)) {
-        rbs_parser_advance(parser);
-        if (parser->current_token.type != pEOF) {
-            rbs_parser_set_error(parser, parser->current_token, true, "expected a token `%s`", rbs_token_type_str(pEOF));
-            raise_error(parser->error, arg->buffer);
-        }
-    }
-
     rbs_translation_context_t ctx = rbs_translation_context_create(
         &parser->constant_pool,
         arg->buffer,

diff --git a/include/rbs/parser.h b/include/rbs/parser.h
@@ -44,7 +44,7 @@ typedef struct rbs_error_t {
  * An RBS parser is a LL(3) parser.
  * */
 typedef struct {
-    rbs_lexer_t *rbs_lexer_t;
+    rbs_lexer_t *lexer;
 
     rbs_token_t current_token;
     rbs_token_t next_token;  /* The first lookahead token */
@@ -127,7 +127,7 @@ rbs_ast_comment_t *rbs_parser_get_comment(rbs_parser_t *parser, int subject_line
 void rbs_parser_set_error(rbs_parser_t *parser, rbs_token_t tok, bool syntax_error, const char *fmt, ...) RBS_ATTRIBUTE_FORMAT(4, 5);
 
 bool rbs_parse_type(rbs_parser_t *parser, rbs_node_t **type);
-bool rbs_parse_method_type(rbs_parser_t *parser, rbs_method_type_t **method_type);
+bool rbs_parse_method_type(rbs_parser_t *parser, rbs_method_type_t **method_type, bool require_eof);
 bool rbs_parse_signature(rbs_parser_t *parser, rbs_signature_t **signature);
 
 bool rbs_parse_type_params(rbs_parser_t *parser, bool module_type_params, rbs_node_list_t **params);

diff --git a/include/rbs/string.h b/include/rbs/string.h
@@ -44,6 +44,4 @@ size_t rbs_string_len(const rbs_string_t self);
  */
 bool rbs_string_equal(const rbs_string_t lhs, const rbs_string_t rhs);
 
-unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string);
-
 #endif
diff --git a/include/rbs/util/rbs_unescape.h b/include/rbs/util/rbs_unescape.h
@@ -4,6 +4,7 @@
 #include <stddef.h>
 #include "rbs/util/rbs_allocator.h"
 #include "rbs/string.h"
+#include "rbs/util/rbs_encoding.h"
 
 /**
  * Receives `rbs_parser_t` and `range`, which represents a string token or symbol token, and returns a string VALUE.
@@ -18,6 +19,6 @@
  *
  * @returns A new owned string that will be freed when the allocator is freed.
  * */
-rbs_string_t rbs_unquote_string(rbs_allocator_t *, const rbs_string_t input);
+rbs_string_t rbs_unquote_string(rbs_allocator_t *, const rbs_string_t input, const rbs_encoding_t *encoding);
 
 #endif // RBS_RBS_UNESCAPE_H
diff --git a/src/location.c b/src/location.c
@@ -8,7 +8,7 @@
 void rbs_loc_alloc_children(rbs_allocator_t *allocator, rbs_location_t *loc, size_t capacity) {
     RBS_ASSERT(capacity <= sizeof(rbs_loc_entry_bitmap) * 8, "Capacity %zu is too large. Max is %zu", capacity, sizeof(rbs_loc_entry_bitmap) * 8);
 
-    loc->children = rbs_allocator_malloc_impl(allocator, RBS_LOC_CHILDREN_SIZE(capacity), rbs_alignof(rbs_loc_children));
+    loc->children = (rbs_loc_children *) rbs_allocator_malloc_impl(allocator, RBS_LOC_CHILDREN_SIZE(capacity), rbs_alignof(rbs_loc_children));
 
     loc->children->len = 0;
     loc->children->required_p = 0;
-Original file line number
+Diff line change
@@ Expand Up / @@ -44,6 +44,4 @@ size_t rbs_string_len(const rbs_string_t self); @@
      */
     bool rbs_string_equal(const rbs_string_t lhs, const rbs_string_t rhs);
-    unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string);
     #endif