Skip to content

Understanding utf16length issues... #688

@anonrig

Description

@anonrig

I'm in the process of replacing utf8lengthv2 in V8 to use simdutf. Ref: https://chromium-review.googlesource.com/c/v8/v8/+/6286751

There is a particular test case in V8 that fails due to the simdutf changes for utf16 input. I wonder if it's a bug with simdutf or with v8 test?

THREADED_TEST(Utf16) {
  LocalContext context;
  v8::HandleScope scope(context->GetIsolate());
  CompileRun(
      "var pad = '01234567890123456789';"
      "var p = [];"
      "var plens = [20, 3, 3];"
      "p.push('01234567890123456789');"
      "var lead = 0xD800;"
      "var trail = 0xDC00;"
      "p.push(String.fromCharCode(0xD800));"
      "p.push(String.fromCharCode(0xDC00));"
      "var a = [];"
      "var b = [];"
      "var c = [];"
      "var alens = [];"
      "for (var i = 0; i < 3; i++) {"
      "  p[1] = String.fromCharCode(lead++);"
      "  for (var j = 0; j < 3; j++) {"
      "    p[2] = String.fromCharCode(trail++);"
      "    a.push(p[i] + p[j]);"
      "    b.push(p[i] + p[j]);"
      "    c.push(p[i] + p[j]);"
      "    alens.push(plens[i] + plens[j]);"
      "  }"
      "}"
      "alens[5] -= 2;"  // Here the surrogate pairs match up.
      "var a2 = [];"
      "var b2 = [];"
      "var c2 = [];"
      "var a2lens = [];"
      "for (var m = 0; m < 9; m++) {"
      "  for (var n = 0; n < 9; n++) {"
      "    a2.push(a[m] + a[n]);"
      "    b2.push(b[m] + b[n]);"
      "    var newc = 'x' + c[m] + c[n] + 'y';"
      "    c2.push(newc.substring(1, newc.length - 1));"
      "    var utf = alens[m] + alens[n];"  // And here.
                                            // The 'n's that start with 0xDC..
                                            // are 6-8 The 'm's that end with
                                            // 0xD8.. are 1, 4 and 7
      "    if ((m % 3) == 1 && n >= 6) utf -= 2;"
      "    a2lens.push(utf);"
      "  }"
      "}");
  Utf16Helper(context, "a", "alens", 9);
  // Utf16Helper(context, "a2", "a2lens", 81);
}

Which generates an output of

a [
  '0123456789012345678901234567890123456789',
  '01234567890123456789\ud800',
  '01234567890123456789\udc02',
  '\ud80101234567890123456789',
  '\ud801\ud801',
  '𐐅',
  '\udc0601234567890123456789',
  '\udc07\ud802',
  '\udc08\udc08'
]
alens [
  40, 23, 23, 23, 6,
   4, 23,  6,  6
]

and uses this helper:

static void Utf16Helper(LocalContext& context, const char* name,
                        const char* lengths_name, int len) {
  Local<v8::Array> a = Local<v8::Array>::Cast(
      context->Global()->Get(context.local(), v8_str(name)).ToLocalChecked());
  Local<v8::Array> alens =
      Local<v8::Array>::Cast(context->Global()
                                 ->Get(context.local(), v8_str(lengths_name))
                                 .ToLocalChecked());
  for (int i = 0; i < len; i++) {
    Local<v8::String> string =
        Local<v8::String>::Cast(a->Get(context.local(), i).ToLocalChecked());
    Local<v8::Number> expected_len = Local<v8::Number>::Cast(
        alens->Get(context.local(), i).ToLocalChecked());
    size_t length = string->Utf8LengthV2(context->GetIsolate());
    CHECK_EQ(expected_len->Value(), length);
  }
}

For the example of Utf16Helper(context, "a", "alens", 9);, it fails:

# Fatal error in ../../test/cctest/test-api.cc, line 8864
# Check failed: expected_len->Value() == length (23 vs. 22).
#
#
#
#FailureMessage Object: 0x7390c37ff8f0
==== C stack trace ===============================

    /home/yagiz/v8/v8/out/x64.optdebug/libv8_libbase.so(v8::base::debug::StackTrace::StackTrace()+0x13) [0x73917d0929e3]
    /home/yagiz/v8/v8/out/x64.optdebug/libv8_libplatform.so(+0x1affd) [0x73917d03dffd]
    /home/yagiz/v8/v8/out/x64.optdebug/libv8_libbase.so(V8_Fatal(char const*, int, char const*, ...)+0x194) [0x73917d076674]
    /home/yagiz/v8/v8/out/x64.optdebug/cctest(+0x523395) [0x60578b7d4395]
    /home/yagiz/v8/v8/out/x64.optdebug/cctest(+0x47f4d1) [0x60578b7304d1]
    /home/yagiz/v8/v8/out/x64.optdebug/cctest(ApiTestFuzzer::Run()+0x73) [0x60578b754303]
    /home/yagiz/v8/v8/out/x64.optdebug/libv8_libbase.so(+0x46796) [0x73917d091796]
    /lib/x86_64-linux-gnu/libc.so.6(+0x9caa4) [0x73917549caa4]
    /lib/x86_64-linux-gnu/libc.so.6(+0x129c3c) [0x739175529c3c]
Aborted (core dumped)
Command: out/x64.optdebug/cctest test-api/Threading3 --random-seed=1873391477 --nohard-abort --verify-heap --testing-d8-test-runner
--- FAILED ---

cc @lemire

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions