|
23 | 23 | # Since this should not require frequent updates, we just store this
|
24 | 24 | # out-of-line and check the unicode.rs file into git.
|
25 | 25 |
|
26 |
| -import fileinput, re, os, sys, operator |
| 26 | +import fileinput, re, os, sys, operator, math |
27 | 27 |
|
28 | 28 | preamble = '''// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
|
29 | 29 | // file at the top-level directory of this distribution and at
|
@@ -359,7 +359,23 @@ def emit_trie_lookup_range_table(f):
|
359 | 359 | let leaf = r.r5[((child as usize) << 6) + ((c >> 6) & 0x3f)];
|
360 | 360 | trie_range_leaf(c, r.r6[leaf as usize])
|
361 | 361 | }
|
362 |
| -}\n |
| 362 | +} |
| 363 | +
|
| 364 | +pub struct SmallBoolTrie { |
| 365 | + r1: &'static [u8], // first level |
| 366 | + r2: &'static [u64], // leaves |
| 367 | +} |
| 368 | +
|
| 369 | +impl SmallBoolTrie { |
| 370 | + fn lookup(&self, c: char) -> bool { |
| 371 | + let c = c as usize; |
| 372 | + match self.r1.get(c >> 6) { |
| 373 | + Some(&child) => trie_range_leaf(c, self.r2[child as usize]), |
| 374 | + None => false, |
| 375 | + } |
| 376 | + } |
| 377 | +} |
| 378 | +
|
363 | 379 | """)
|
364 | 380 |
|
365 | 381 | def compute_trie(rawdata, chunksize):
|
@@ -429,13 +445,49 @@ def emit_bool_trie(f, name, t_data, is_pub=True):
|
429 | 445 |
|
430 | 446 | f.write(" };\n\n")
|
431 | 447 |
|
| 448 | +def emit_small_bool_trie(f, name, t_data, is_pub=True): |
| 449 | + last_chunk = max(int(hi / 64) for (lo, hi) in t_data) |
| 450 | + n_chunks = last_chunk + 1 |
| 451 | + chunks = [0] * n_chunks |
| 452 | + for (lo, hi) in t_data: |
| 453 | + for cp in range(lo, hi + 1): |
| 454 | + if int(cp / 64) >= len(chunks): |
| 455 | + print(cp, int(cp / 64), len(chunks), lo, hi) |
| 456 | + chunks[int(cp / 64)] |= 1 << (cp & 63) |
| 457 | + |
| 458 | + pub_string = "" |
| 459 | + if is_pub: |
| 460 | + pub_string = "pub " |
| 461 | + f.write(" %sconst %s: &'static super::SmallBoolTrie = &super::SmallBoolTrie {\n" |
| 462 | + % (pub_string, name)) |
| 463 | + |
| 464 | + (r1, r2) = compute_trie(chunks, 1) |
| 465 | + |
| 466 | + f.write(" r1: &[\n") |
| 467 | + data = ','.join(str(node) for node in r1) |
| 468 | + format_table_content(f, data, 12) |
| 469 | + f.write("\n ],\n") |
| 470 | + |
| 471 | + f.write(" r2: &[\n") |
| 472 | + data = ','.join('0x%016x' % node for node in r2) |
| 473 | + format_table_content(f, data, 12) |
| 474 | + f.write("\n ],\n") |
| 475 | + |
| 476 | + f.write(" };\n\n") |
| 477 | + |
432 | 478 | def emit_property_module(f, mod, tbl, emit):
|
433 | 479 | f.write("pub mod %s {\n" % mod)
|
434 | 480 | for cat in sorted(emit):
|
435 |
| - emit_bool_trie(f, "%s_table" % cat, tbl[cat]) |
436 |
| - f.write(" pub fn %s(c: char) -> bool {\n" % cat) |
437 |
| - f.write(" super::trie_lookup_range_table(c, %s_table)\n" % cat) |
438 |
| - f.write(" }\n\n") |
| 481 | + if cat in ["Cc", "White_Space", "Pattern_White_Space"]: |
| 482 | + emit_small_bool_trie(f, "%s_table" % cat, tbl[cat]) |
| 483 | + f.write(" pub fn %s(c: char) -> bool {\n" % cat) |
| 484 | + f.write(" %s_table.lookup(c)\n" % cat) |
| 485 | + f.write(" }\n\n") |
| 486 | + else: |
| 487 | + emit_bool_trie(f, "%s_table" % cat, tbl[cat]) |
| 488 | + f.write(" pub fn %s(c: char) -> bool {\n" % cat) |
| 489 | + f.write(" super::trie_lookup_range_table(c, %s_table)\n" % cat) |
| 490 | + f.write(" }\n\n") |
439 | 491 | f.write("}\n\n")
|
440 | 492 |
|
441 | 493 | def emit_conversions_module(f, to_upper, to_lower, to_title):
|
|
0 commit comments