-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
std: add ascii with C ASCII character classes #2095
Conversation
Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See ziglang#515 @cdefine("_NO_CRT_STDIO_INLINE", "1"); @Cinclude("stdio.h"); @Cinclude("string.h"); @Cinclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
👍 cool |
return c < 0x20 or c == 127; //DEL | ||
} | ||
|
||
pub fn isCntrl(c: u8) bool { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why have this alias?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because it matches the C function, iscntrl()
. While "ctrl" is the more accepted abbreviation of control, I thought it would make things easier to just support both.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should just pick one. The zig way in situations like this is to pick a side and make everybody conform.
} | ||
|
||
pub fn isASCII(c: u8) bool { | ||
return c < 128; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe use c & 0x80 == 0
instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a feeling; I've always thought of all the ascii sets tests as bitmasking operations. It's odd for me to see them as normal comparisons....
return inTable(c, tIndex.Upper); | ||
} | ||
|
||
pub fn isXDigit(c: u8) bool { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do these function names/capitalisation make sense?
isPunct
is cutting off letters
isASCII
is mostly caps
isXDigit
looks weird
Related to #1884 I guess?
} | ||
|
||
pub fn isPrint(c: u8) bool { | ||
return inTable(c, tIndex.Graph) or c == ' '; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth rewriting as isGraph(c) or c == ' '
?
I needed a better toUpper() than the poor version in fmt/parse_float.zig working on #2047, and ended up with this.
Does NOT look at the locale the way the C functions do.
Tested to match glibc (when using C locale) with this program:
const c = @cImport({
// See #515
@cdefine("_NO_CRT_STDIO_INLINE", "1");
@Cinclude("stdio.h");
@Cinclude("string.h");
@Cinclude("ctype.h");
});
const std = @import("std");
const ascii = std.ascii;
const abort = std.os.abort;
export fn main(argc: c_int, argv: **u8) c_int {
var i: u8 = undefined;
i = 0;
while (true) {
if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); }
if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); }
if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); }
if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); }
if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); }
if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); }
if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); }
if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); }
if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); }
if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); }
if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); }
if (i == 255) { break; }
i += 1;
}
}