From a2778953bc0b61fd372f2de663e21569d361131e Mon Sep 17 00:00:00 2001 From: Luhrel Date: Mon, 2 Mar 2020 11:18:12 +0100 Subject: [PATCH 01/10] Implements chkscanf.d --- changelog/chkscanf.md | 48 +++ src/dmd/chkscanf.d | 614 +++++++++++++++++++++++++++++++ src/dmd/expressionsem.d | 12 + src/dmd/id.d | 1 + test/fail_compilation/chkscanf.d | 74 ++++ 5 files changed, 749 insertions(+) create mode 100644 changelog/chkscanf.md create mode 100644 src/dmd/chkscanf.d create mode 100644 test/fail_compilation/chkscanf.d diff --git a/changelog/chkscanf.md b/changelog/chkscanf.md new file mode 100644 index 000000000000..7493051e586c --- /dev/null +++ b/changelog/chkscanf.md @@ -0,0 +1,48 @@ +# Validate scanf arguments against format specifiers + +Follows the C99 specification 7.19.6.2 for scanf. + +Takes a strict view of compatiblity. + +Diagnosed incompatibilities are: + +1. incompatible sizes which will cause argument misalignment +2. insufficient number of arguments +3. struct arguments +4. array and slice arguments +5. non-standard formats +6. undefined behavior per C99 + +Per the C Standard, extra arguments are ignored. + +No attempt is made to fix the arguments or the format string. + +In order to use non-Standard scanf formats, an easy workaround is: + +``` +scanf("%k\n", value); // error: non-Standard format k +``` +``` +const format = "%k\n"; +scanf(format.ptr, value); // no error +``` + +Most of the errors detected are portability issues. For instance, + +``` +int i; +scanf("%ld\n", &i); +size_t s; +scanf("%d\n", &s); +ulong u; +scanf("%lld%*c\n", u); +``` +should be replaced with: +``` +int i; +printf("%d\n", &i; +size_t s; +scanf("%zd\n", &s); +ulong u; +printf("%llu%*c\n", u); +``` diff --git a/src/dmd/chkscanf.d b/src/dmd/chkscanf.d new file mode 100644 index 000000000000..c3d58b670d2c --- /dev/null +++ b/src/dmd/chkscanf.d @@ -0,0 +1,614 @@ +/** + * Check the arguments to `scanf` against the `format` string. + * + * Compiler implementation of the + * $(LINK2 http://www.dlang.org, D programming language). + * + * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved + * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) + * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) + * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkscanf.d, _chkscanf.d) + * Documentation: https://dlang.org/phobos/dmd_chkscanf.html + * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkscanf.d + */ + +module dmd.chkscanf; + +import core.stdc.stdio : scanf; +import core.stdc.ctype : isdigit; + +import dmd.errors; +import dmd.expression; +import dmd.globals; +import dmd.mtype; +import dmd.target; + +/****************************************** + * Check that arguments to a scanf format string are compatible + * with that string. Issue errors for incompatibilities. + * + * Follows the C99 specification for scanf. + * + * Takes a generous, rather than strict, view of compatiblity. + * For example, an unsigned value can be formatted with a signed specifier. + * + * Diagnosed incompatibilities are: + * + * 1. incompatible sizes which will cause argument misalignment + * 2. deferencing arguments that are not pointers + * 3. insufficient number of arguments + * 4. struct arguments + * 5. array and slice arguments + * 6. non-standard formats + * 7. undefined behavior per C99 + * + * Per the C Standard, extra arguments are ignored. + * + * No attempt is made to fix the arguments or the format string. + * + * Returns: + * `true` if errors occurred + * References: + * C99 7.19.6.2 + * http://www.cplusplus.com/reference/cstdio/scanf/ + */ + +bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args) +{ + size_t n = 0; + for (size_t i = 0; i < format.length;) + { + if (format[i] != '%') + { + ++i; + continue; + } + bool asterisk; + size_t j = i; + const fmt = parseFormatSpecifier(format, j, asterisk); + const slice = format[i .. j]; + i = j; + + if (fmt == Format.percent || asterisk) + continue; // "%%", "%*": no arguments + + Expression getNextArg() + { + if (n == args.length) + { + if (!asterisk) + deprecation(loc, "more format specifiers than %d arguments", cast(int)n); + return null; + } + return args[n++]; + } + + void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual) + { + deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", + prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); + } + + auto e = getNextArg(); + if (!e) + return true; + + auto t = e.type.toBasetype(); + auto tnext = t.nextOf(); + const c_longsize = target.c.longsize; + const is64bit = global.params.is64bit; + + final switch (fmt) + { + case Format.d: // pointer to int + if (!(t.ty == Tpointer && tnext.ty == Tint32)) + errorMsg(null, slice, e, "int*", t); + break; + + case Format.hhd: // pointer to signed char + if (!(t.ty == Tpointer && tnext.ty == Tint16)) + errorMsg(null, slice, e, "byte*", t); + break; + + case Format.hd: // pointer to short + if (!(t.ty == Tpointer && tnext.ty == Tint16)) + errorMsg(null, slice, e, "short*", t); + break; + + case Format.ld: // pointer to long int + if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) + errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t); + break; + + case Format.lld: // pointer to long long int + if (!(t.ty == Tpointer && tnext.ty == Tint64)) + errorMsg(null, slice, e, "long*", t); + break; + + case Format.jd: // pointer to intmax_t + if (!(t.ty == Tpointer && tnext.ty == Tint64)) + errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t); + break; + + case Format.zd: // pointer to size_t + if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) + errorMsg(null, slice, e, "size_t*", t); + break; + case Format.td: // pointer to ptrdiff_t + if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32))) + errorMsg(null, slice, e, "ptrdiff_t*", t); + break; + + case Format.u: // pointer to unsigned int + if (!(t.ty == Tpointer && tnext.ty == Tuns32)) + errorMsg(null, slice, e, "uint*", t); + break; + + case Format.hhu: // pointer to unsigned char + if (!(t.ty == Tpointer && tnext.ty == Tuns8)) + errorMsg(null, slice, e, "ubyte*", t); + break; + + case Format.hu: // pointer to unsigned short int + if (!(t.ty == Tpointer && tnext.ty == Tuns16)) + errorMsg(null, slice, e, "ushort*", t); + break; + + case Format.lu: // pointer to unsigned long int + if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) + errorMsg(null, slice, e, (c_longsize == 4 ? "uint*" : "ulong*"), t); + break; + + case Format.llu: // pointer to unsigned long long int + if (!(t.ty == Tpointer && tnext.ty == Tuns64)) + errorMsg(null, slice, e, "ulong*", t); + break; + + case Format.ju: // pointer to uintmax_t + if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) + errorMsg(null, slice, e, "ulong*", t); + break; + + case Format.g: // pointer to float + if (!(t.ty == Tpointer && tnext.ty == Tfloat32)) + errorMsg(null, slice, e, "float*", t); + break; + case Format.lg: // pointer to double + if (!(t.ty == Tpointer && tnext.ty == Tfloat64)) + errorMsg(null, slice, e, "double*", t); + break; + case Format.Lg: // pointer to long double + if (!(t.ty == Tpointer && tnext.ty == Tfloat80)) + errorMsg(null, slice, e, "real*", t); + break; + + case Format.s: // pointer to char string + if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) + errorMsg(null, slice, e, "char*", t); + break; + + case Format.ls: // pointer to wchar_t string + const twchar_t = global.params.isWindows ? Twchar : Tdchar; + if (!(t.ty == Tpointer && tnext.ty == twchar_t)) + errorMsg(null, slice, e, "wchar_t*", t); + break; + + case Format.p: // double pointer + if (!(t.ty == Tpointer && tnext.ty == Tpointer)) + errorMsg(null, slice, e, "void**", t); + break; + + case Format.error: + deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); + break; + + case Format.percent: + assert(0); + } + } + return false; +} + +private: + +/* Different kinds of formatting specifications, variations we don't + care about are merged. (Like we don't care about the difference between + f, e, g, a, etc.) + */ +enum Format +{ + d, // pointer to int + hhd, // pointer to signed char + hd, // pointer to short int + ld, // pointer to long int + lld, // pointer to long long int + jd, // pointer to intmax_t + zd, // pointer to size_t + td, // pointer to ptrdiff_t + u, // pointer to unsigned int + hhu, // pointer to unsigned char + hu, // pointer to unsigned short int + lu, // pointer to unsigned long int + llu, // pointer to unsigned long long int + ju, // pointer to uintmax_t + g, // pointer to float + lg, // pointer to double + Lg, // pointer to long double + s, // pointer to char string + ls, // pointer to wchar_t string + p, // double pointer + percent, // %% (i.e. no argument) + error, // invalid format specification +} + + +/************************************** + * Parse the *format specifier* which is of the form: + * + * `%[*][width][length]specifier` + * + * Params: + * format = format string + * idx = index of `%` of start of format specifier, + * which gets updated to index past the end of it, + * even if Format.error is returned + * asterisk = set if there is a `*` sub-specifier + * Returns: + * Format + */ +pure nothrow @safe +Format parseFormatSpecifier(scope const char[] format, ref size_t idx, + out bool asterisk) +{ + auto i = idx; + assert(format[i] == '%'); + const length = format.length; + + Format error() + { + idx = i; + return Format.error; + } + + ++i; + if (i == length) + return error(); + + if (format[i] == '%') + { + idx = i + 1; + return Format.percent; + } + + // * sub-specifier + if (format[i] == '*') + { + ++i; + if (i == length) + return error(); + asterisk = true; + } + + // fieldWidth + { + while (isdigit(format[i])) + { + i++; + if (i == length) + return error(); + } + } + + /* Read the scanset + * A scanset can be anything, so we just check that it is paired + */ + if (format[i] == '[') + { + while (i < length) + { + if (format[i] == ']') + break; + ++i; + } + + // no `]` found + if (i == length) + return error(); + + ++i; + // no specifier after `]` + // it could be mixed with the one above, but then idx won't have the right index + if (i == length) + return error(); + } + + /* Read the `length modifier` + */ + const lm = format[i]; + bool lm1; // if jztL + bool lm2; // if `hh` or `ll` + if (lm == 'j' || + lm == 'z' || + lm == 't' || + lm == 'L') + { + ++i; + if (i == length) + return error(); + lm1 = true; + } + else if (lm == 'h' || lm == 'l') + { + ++i; + if (i == length) + return error(); + lm2 = lm == format[i]; + if (lm2) + { + ++i; + if (i == length) + return error(); + } + } + + /* Read the `specifier` + */ + Format specifier; + const sc = format[i]; + ++i; + switch (sc) + { + case 'd': + case 'i': + case 'n': + if (lm == 'L') + return error(); + specifier = lm == 'h' && lm2 ? Format.hhd : + lm == 'h' ? Format.hd : + lm == 'l' && lm2 ? Format.lld : + lm == 'l' ? Format.ld : + lm == 'j' ? Format.jd : + lm == 'z' ? Format.zd : + lm == 't' ? Format.td : + Format.d; + break; + case 'u': + case 'o': + case 'x': + case 'X': + if (lm == 'L') + return error(); + specifier = lm == 'h' && lm2 ? Format.hhu : + lm == 'h' ? Format.hu : + lm == 'l' && lm2 ? Format.llu : + lm == 'l' ? Format.lu : + lm == 'j' ? Format.ju : + lm == 'z' ? Format.zd : + lm == 't' ? Format.td : + Format.u; + break; + + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + case 'a': + case 'A': + if (lm == 'L') + specifier = Format.Lg; + else if (lm == 'l' && !lm2) + specifier = Format.lg; + else if (lm1 || lm2 || lm == 'h') + return error(); + else + specifier = Format.g; + break; + + case 'c': + case 's': + if (lm == 'l' && !lm2) + specifier = Format.ls; + else if (lm1 || lm2 || lm == 'h') + return error(); + else + specifier = Format.s; + break; + + case 'p': + if (lm1 || lm2 || lm == 'h' || lm == 'l') + return error(); + specifier = Format.p; + break; + + default: + return error(); + } + + idx = i; + return specifier; // success +} + +unittest +{ + size_t idx; + bool asterisk; + + // one for each Format + idx = 0; + assert(parseFormatSpecifier("%d", idx, asterisk) == Format.d); + assert(idx == 2); + assert(!asterisk); + + idx = 0; + assert(parseFormatSpecifier("%hhd", idx, asterisk) == Format.hhd); + assert(idx == 4); + + idx = 0; + assert(parseFormatSpecifier("%hd", idx, asterisk) == Format.hd); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%ld", idx, asterisk) == Format.ld); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%lld", idx, asterisk) == Format.lld); + assert(idx == 4); + + idx = 0; + assert(parseFormatSpecifier("%jd", idx, asterisk) == Format.jd); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%zd", idx, asterisk) == Format.zd); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%td", idx, asterisk,) == Format.td); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%u", idx, asterisk) == Format.u); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu); + assert(idx == 4); + + idx = 0; + assert(parseFormatSpecifier("%hu", idx, asterisk) == Format.hu); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%lu", idx, asterisk) == Format.lu); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%llu", idx, asterisk) == Format.llu); + assert(idx == 4); + + idx = 0; + assert(parseFormatSpecifier("%ju", idx, asterisk) == Format.ju); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%g", idx, asterisk) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%lg", idx, asterisk) == Format.lg); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%Lg", idx, asterisk) == Format.Lg); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%p", idx, asterisk) == Format.p); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%s", idx, asterisk) == Format.s); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%ls", idx, asterisk,) == Format.ls); + assert(idx == 3); + + idx = 0; + assert(parseFormatSpecifier("%%", idx, asterisk) == Format.percent); + assert(idx == 2); + + // Synonyms + idx = 0; + assert(parseFormatSpecifier("%i", idx, asterisk) == Format.d); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%n", idx, asterisk) == Format.d); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%o", idx, asterisk) == Format.u); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%x", idx, asterisk) == Format.u); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%f", idx, asterisk) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%e", idx, asterisk) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%a", idx, asterisk) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parseFormatSpecifier("%c", idx, asterisk) == Format.s); + assert(idx == 2); + + // asterisk + idx = 0; + assert(parseFormatSpecifier("%*d", idx, asterisk) == Format.d); + assert(idx == 3); + assert(asterisk); + + idx = 0; + assert(parseFormatSpecifier("%9ld", idx, asterisk) == Format.ld); + assert(idx == 4); + assert(!asterisk); + + idx = 0; + assert(parseFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd); + assert(idx == 10); + assert(asterisk); + + // scansets + idx = 0; + assert(parseFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s); + assert(idx == 10); + assert(!asterisk); + + idx = 0; + assert(parseFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd); + assert(idx == 12); + assert(asterisk); + + // Too short formats + foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19", + "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"]) + { + idx = 0; + assert(parseFormatSpecifier(s, idx, asterisk) == Format.error); + assert(idx == s.length); + } + + + // Undefined format combinations + foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", + "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", + "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", + "%-", "%+", "%#", "%0", "%.", "%Ln"]) + { + idx = 0; + assert(parseFormatSpecifier(s, idx, asterisk) == Format.error); + assert(idx == s.length); + + } + + // Invalid scansets + foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"]) + { + idx = 0; + assert(parseFormatSpecifier(s, idx, asterisk) == Format.error); + assert(idx == s.length); + } + +} diff --git a/src/dmd/expressionsem.d b/src/dmd/expressionsem.d index 1d86da373dff..9054e2317863 100644 --- a/src/dmd/expressionsem.d +++ b/src/dmd/expressionsem.d @@ -23,6 +23,7 @@ import dmd.attrib; import dmd.astcodegen; import dmd.canthrow; import dmd.chkprintf; +import dmd.chkscanf; import dmd.ctorflow; import dmd.dscope; import dmd.dsymbol; @@ -2161,6 +2162,17 @@ private bool functionParameters(const ref Loc loc, Scope* sc, } } + /* If calling C scanf(), check the format string against the arguments + */ + if (tf.linkage == LINK.c && nparams >= 1 && fd && fd.ident == Id.scanf) + { + if (auto se = (*arguments)[0].isStringExp()) + { + if (checkScanfFormat(se.loc, se.peekString(), (*arguments)[1 .. nargs])) + err = true; + } + } + /* Remaining problems: * 1. order of evaluation - some function push L-to-R, others R-to-L. Until we resolve what array assignment does (which is * implemented by calling a function) we'll defer this for now. diff --git a/src/dmd/id.d b/src/dmd/id.d index 5de9c21fc456..1d9fca2e7a82 100644 --- a/src/dmd/id.d +++ b/src/dmd/id.d @@ -134,6 +134,7 @@ immutable Msgtable[] msgtable = { "_unittest", "unittest" }, { "_body", "body" }, { "printf" }, + { "scanf" }, { "TypeInfo" }, { "TypeInfo_Class" }, diff --git a/test/fail_compilation/chkscanf.d b/test/fail_compilation/chkscanf.d new file mode 100644 index 000000000000..ac97b9b08e47 --- /dev/null +++ b/test/fail_compilation/chkscanf.d @@ -0,0 +1,74 @@ +/* +REQUIRED_ARGS: -de +TEST_OUTPUT: +--- +fail_compilation/chkscanf.d(101): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` +fail_compilation/chkscanf.d(102): Deprecation: more format specifiers than 1 arguments +fail_compilation/chkscanf.d(103): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` +fail_compilation/chkscanf.d(104): Deprecation: argument `0L` for format specification `"%3u"` must be `uint*`, not `long` +fail_compilation/chkscanf.d(105): Deprecation: argument `u` for format specification `"%200u"` must be `uint*`, not `uint` +fail_compilation/chkscanf.d(106): Deprecation: argument `3.00000` for format specification `"%hhd"` must be `byte*`, not `double` +fail_compilation/chkscanf.d(107): Deprecation: argument `4` for format specification `"%hd"` must be `short*`, not `int` +fail_compilation/chkscanf.d(109): Deprecation: argument `4` for format specification `"%lld"` must be `long*`, not `int` +fail_compilation/chkscanf.d(110): Deprecation: argument `5` for format specification `"%jd"` must be `core.stdc.stdint.intmax_t*`, not `int` +fail_compilation/chkscanf.d(111): Deprecation: argument `6.00000` for format specification `"%zd"` must be `size_t*`, not `double` +fail_compilation/chkscanf.d(112): Deprecation: argument `7.00000` for format specification `"%td"` must be `ptrdiff_t*`, not `double` +fail_compilation/chkscanf.d(113): Deprecation: format specifier `"%Ld"` is invalid +fail_compilation/chkscanf.d(114): Deprecation: argument `0` for format specification `"%u"` must be `uint*`, not `int` +fail_compilation/chkscanf.d(115): Deprecation: argument `0` for format specification `"%hhu"` must be `ubyte*`, not `int` +fail_compilation/chkscanf.d(116): Deprecation: argument `0` for format specification `"%hu"` must be `ushort*`, not `int` +fail_compilation/chkscanf.d(118): Deprecation: argument `0` for format specification `"%llu"` must be `ulong*`, not `int` +fail_compilation/chkscanf.d(119): Deprecation: argument `0` for format specification `"%ju"` must be `ulong*`, not `int` +fail_compilation/chkscanf.d(120): Deprecation: argument `0` for format specification `"%zu"` must be `size_t*`, not `int` +fail_compilation/chkscanf.d(121): Deprecation: argument `0` for format specification `"%tu"` must be `ptrdiff_t*`, not `int` +fail_compilation/chkscanf.d(122): Deprecation: argument `8.00000L` for format specification `"%g"` must be `float*`, not `real` +fail_compilation/chkscanf.d(123): Deprecation: argument `8.00000L` for format specification `"%lg"` must be `double*`, not `real` +fail_compilation/chkscanf.d(124): Deprecation: argument `9.00000` for format specification `"%Lg"` must be `real*`, not `double` +fail_compilation/chkscanf.d(125): Deprecation: argument `& u` for format specification `"%s"` must be `char*`, not `int*` +fail_compilation/chkscanf.d(126): Deprecation: argument `& u` for format specification `"%ls"` must be `wchar_t*`, not `int*` +fail_compilation/chkscanf.d(127): Deprecation: argument `v` for format specification `"%p"` must be `void**`, not `void*` +fail_compilation/chkscanf.d(128): Deprecation: argument `& u` for format specification `"%n"` must be `int*`, not `ushort*` +fail_compilation/chkscanf.d(129): Deprecation: argument `& u` for format specification `"%hhn"` must be `byte*`, not `int*` +fail_compilation/chkscanf.d(130): Deprecation: format specifier `"%[n"` is invalid +fail_compilation/chkscanf.d(131): Deprecation: format specifier `"%]"` is invalid +fail_compilation/chkscanf.d(132): Deprecation: argument `& u` for format specification `"%90s"` must be `char*`, not `int*` +--- +*/ + + +import core.stdc.stdio : scanf; + +#line 100 + +void test1() { scanf("%d\n", 0L); } +void test2() { int i; scanf("%d %d\n", &i); } +void test3() { scanf("%d%*c\n", 0L); } +void test4() { scanf("%3u\n", 0L); } +void test5() { uint u; scanf("%200u%*s\n", u); } +void test6() { scanf("%hhd\n", 3.0); } +void test7() { scanf("%hd\n", 4); } +//void test8() { scanf("%ld\n", 3.0); } +void test9() { scanf("%lld\n", 4); } +void test10() { scanf("%jd\n", 5); } +void test11() { scanf("%zd\n", 6.0); } +void test12() { scanf("%td\n", 7.0); } +void test13() { scanf("%Ld\n", 0); } +void test14() { scanf("%u\n", 0); } +void test15() { scanf("%hhu\n", 0); } +void test16() { scanf("%hu\n", 0); } +//void test17() { scanf("%lu\n", 0); } +void test18() { scanf("%llu\n", 0); } +void test19() { scanf("%ju\n", 0); } +void test20() { scanf("%zu\n", 0); } +void test21() { scanf("%tu\n", 0); } +void test22() { scanf("%g\n", 8.0L); } +void test23() { scanf("%lg\n", 8.0L); } +void test24() { scanf("%Lg\n", 9.0); } +void test25() { int u; scanf("%s\n", &u); } +void test26() { int u; scanf("%ls\n", &u); } +void test27() { void* v; scanf("%p\n", v); } +void test28() { ushort u; scanf("%n\n", &u); } +void test29() { int u; scanf("%hhn\n", &u); } +void test30() { int u; scanf("%[n", &u); } +void test31() { int u; scanf("%]\n", &u); } +void test32() { int u; scanf("%90s\n", &u); } From d7300e3fa09b7f8129d50f92de9e376472b8e67d Mon Sep 17 00:00:00 2001 From: Luhrel Date: Mon, 2 Mar 2020 16:23:05 +0100 Subject: [PATCH 02/10] Implements chkformat.d Now `chkscanf` and `chkprintf` use the same base function for parsing a generic `Format`. --- src/dmd/chkformat.d | 228 ++++++++++++++++++++++++++++++++++++++++++++ src/dmd/chkprintf.d | 172 ++++++++------------------------- src/dmd/chkscanf.d | 156 ++++-------------------------- 3 files changed, 287 insertions(+), 269 deletions(-) create mode 100644 src/dmd/chkformat.d diff --git a/src/dmd/chkformat.d b/src/dmd/chkformat.d new file mode 100644 index 000000000000..f95aa11d1b2e --- /dev/null +++ b/src/dmd/chkformat.d @@ -0,0 +1,228 @@ +/** + * Parse the `format` string. Made for `scanf` and `printf` checks. + * + * Compiler implementation of the + * $(LINK2 http://www.dlang.org, D programming language). + * + * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved + * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) + * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) + * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d) + * Documentation: https://dlang.org/phobos/dmd_chkformat.html + * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d + */ +module dmd.chkformat; + +/* Different kinds of formatting specifications, variations we don't + care about are merged. (Like we don't care about the difference between + f, e, g, a, etc.) + + For `scanf`, every format is a pointer. + */ +enum Format +{ + d, // int + hhd, // signed char + hd, // short int + ld, // long int + lld, // long long int + jd, // intmax_t + zd, // size_t + td, // ptrdiff_t + u, // unsigned int + hhu, // unsigned char + hu, // unsigned short int + lu, // unsigned long int + llu, // unsigned long long int + ju, // uintmax_t + g, // float (scanf) / double (printf) + lg, // double (scanf) + Lg, // long double (both) + s, // char string (both) + ls, // wchar_t string (both) + c, // char (printf) + lc, // wint_t (printf) + p, // pointer + n, // pointer to int + hhn, // pointer to signed char + hn, // pointer to short + ln, // pointer to long int + lln, // pointer to long long int + jn, // pointer to intmax_t + zn, // pointer to size_t + tn, // pointer to ptrdiff_t + percent, // %% (i.e. no argument) + error, // invalid format specification +} + +/************************************** + * Parse the *length specifier* and the *specifier* of the following form: + * `[length]specifier` + * + * Params: + * format = format string + * idx = index of of start of format specifier, + * which gets updated to index past the end of it, + * even if `Format.error` is returned + * genSpecifier = Generic specifier. For instance, it will be set to `d` if the + * format is `hdd`. + * Returns: + * Format + */ +pure @safe nothrow +Format parseGenericFormatSpecifier(scope const char[] format, + ref size_t idx, out char genSpecifier) +{ + const length = format.length; + + /* Read the `length modifier` + */ + const lm = format[idx]; + bool lm1; // if jztL + bool lm2; // if `hh` or `ll` + if (lm == 'j' || + lm == 'z' || + lm == 't' || + lm == 'L') + { + ++idx; + if (idx == length) + return Format.error; + lm1 = true; + } + else if (lm == 'h' || lm == 'l') + { + ++idx; + if (idx == length) + return Format.error; + lm2 = lm == format[idx]; + if (lm2) + { + ++idx; + if (idx == length) + return Format.error; + } + } + + /* Read the `specifier` + */ + Format specifier; + const sc = format[idx]; + genSpecifier = sc; + switch (sc) + { + case 'd': + case 'i': + if (lm == 'L') + specifier = Format.error; + else + specifier = lm == 'h' && lm2 ? Format.hhd : + lm == 'h' ? Format.hd : + lm == 'l' && lm2 ? Format.lld : + lm == 'l' ? Format.ld : + lm == 'j' ? Format.jd : + lm == 'z' ? Format.zd : + lm == 't' ? Format.td : + Format.d; + break; + + case 'u': + case 'o': + case 'x': + case 'X': + if (lm == 'L') + specifier = Format.error; + else + specifier = lm == 'h' && lm2 ? Format.hhu : + lm == 'h' ? Format.hu : + lm == 'l' && lm2 ? Format.llu : + lm == 'l' ? Format.lu : + lm == 'j' ? Format.ju : + lm == 'z' ? Format.zd : + lm == 't' ? Format.td : + Format.u; + break; + + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + case 'a': + case 'A': + if (lm == 'L') + specifier = Format.Lg; + else if (lm1 || lm2 || lm == 'h') + specifier = Format.error; + else + specifier = lm == 'l' ? Format.lg : Format.g; + break; + + case 'c': + if (lm1 || lm2 || lm == 'h') + specifier = Format.error; + else + specifier = lm == 'l' ? Format.lc : Format.c; + break; + + case 's': + if (lm1 || lm2 || lm == 'h') + specifier = Format.error; + else + specifier = lm == 'l' ? Format.ls : Format.s; + break; + + case 'p': + if (lm1 || lm2 || lm == 'h' || lm == 'l') + specifier = Format.error; + else + specifier = Format.p; + break; + + case 'n': + if (lm == 'L') + specifier = Format.error; + else + specifier = lm == 'l' && lm2 ? Format.lln : + lm == 'l' ? Format.ln : + lm == 'h' && lm2 ? Format.hhn : + lm == 'h' ? Format.hn : + lm == 'j' ? Format.jn : + lm == 'z' ? Format.zn : + lm == 't' ? Format.tn : + Format.n; + break; + + default: + specifier = Format.error; + break; + } + + ++idx; + return specifier; // success +} + +unittest +{ + char genSpecifier; + size_t idx; + + assert(parseGenericFormatSpecifier("hhd", idx, genSpecifier) == Format.hhd); + assert(genSpecifier == 'd'); + + idx = 0; + assert(parseGenericFormatSpecifier("hn", idx, genSpecifier) == Format.hn); + assert(genSpecifier == 'n'); + + idx = 0; + assert(parseGenericFormatSpecifier("ji", idx, genSpecifier) == Format.jd); + assert(genSpecifier == 'i'); + + idx = 0; + assert(parseGenericFormatSpecifier("lu", idx, genSpecifier) == Format.lu); + assert(genSpecifier == 'u'); + + idx = 0; + assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error); +} diff --git a/src/dmd/chkprintf.d b/src/dmd/chkprintf.d index 8c734d9d26df..7f048ddc8120 100644 --- a/src/dmd/chkprintf.d +++ b/src/dmd/chkprintf.d @@ -16,6 +16,7 @@ module dmd.chkprintf; import core.stdc.stdio : printf; +import dmd.chkformat; import dmd.errors; import dmd.expression; import dmd.globals; @@ -120,21 +121,37 @@ bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expre final switch (fmt) { + case Format.u: // unsigned int case Format.d: // int if (t.ty != Tint32 && t.ty != Tuns32) errorMsg(null, slice, e, "int", t); break; + case Format.hhu: // unsigned char + case Format.hhd: // signed char + if (t.ty != Tint8 && t.ty != Tuns8) + errorMsg(null, slice, e, "byte", t); + break; + + case Format.hu: // unsigned short int + case Format.hd: // short int + if (t.ty != Tint16 && t.ty != Tuns16) + errorMsg(null, slice, e, "short", t); + break; + + case Format.lu: // unsigned long int case Format.ld: // long int if (!(t.isintegral() && t.size() == c_longsize)) errorMsg(null, slice, e, (c_longsize == 4 ? "int" : "long"), t); break; + case Format.llu: // unsigned long long int case Format.lld: // long long int if (t.ty != Tint64 && t.ty != Tuns64) errorMsg(null, slice, e, "long", t); break; + case Format.ju: // uintmax_t case Format.jd: // intmax_t if (t.ty != Tint64 && t.ty != Tuns64) errorMsg(null, slice, e, "core.stdc.stdint.intmax_t", t); @@ -229,6 +246,7 @@ bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expre deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); break; + case Format.lg: case Format.percent: assert(0); } @@ -238,38 +256,6 @@ bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expre private: -/* Different kinds of formatting specifications, variations we don't - care about are merged. (Like we don't care about the difference between - a, A, g, G, etc.) - */ -enum Format -{ - d, // int - ld, // long int - lld, // long long int - jd, // intmax_t - zd, // size_t - td, // ptrdiff_t - g, // double - Lg, // long double - p, // pointer - n, // pointer to int - ln, // pointer to long int - lln, // pointer to long long int - hn, // pointer to short - hhn, // pointer to signed char - jn, // pointer to intmax_t - zn, // pointer to size_t - tn, // pointer to ptrdiff_t - c, // char - lc, // wint_t - s, // pointer to char string - ls, // pointer to wchar_t string - percent, // %% (i.e. no argument) - error, // invalid format specification -} - - /************************************** * Parse the *format specifier* which is of the form: * @@ -279,7 +265,7 @@ enum Format * format = format string * idx = index of `%` of start of format specifier, * which gets updated to index past the end of it, - * even if Format.error is returned + * even if `Format.error` is returned * widthStar = set if * for width * precisionStar = set if * for precision * Returns: @@ -397,115 +383,34 @@ Format parseFormatSpecifier(scope const char[] format, ref size_t idx, } } - /* Read the `length modifier` + /* Read the specifier */ - const lm = format[i]; - bool lm1; // if jztL - bool lm2; // if `hh` or `ll` - if (lm == 'j' || - lm == 'z' || - lm == 't' || - lm == 'L') - { - ++i; - if (i == length) - return error(); - lm1 = true; - } - else if (lm == 'h' || lm == 'l') - { - ++i; - if (i == length) - return error(); - lm2 = lm == format[i]; - if (lm2) - { - ++i; - if (i == length) - return error(); - } - } + char genSpec; + Format specifier = parseGenericFormatSpecifier(format, i, genSpec); + if (specifier == Format.error || specifier == Format.lg) + return error(); - /* Read the `specifier` - */ - Format specifier; - const sc = format[i]; - ++i; - switch (sc) + switch (genSpec) { - case 'd': - case 'i': - case 'u': - if (hash) - return error(); - goto case 'o'; - - case 'o': - case 'x': - case 'X': - specifier = lm == 'l' && lm2 ? Format.lld : - lm == 'l' ? Format.ld : - lm == 'j' ? Format.jd : - lm == 'z' ? Format.zd : - lm == 't' ? Format.td : - Format.d; - break; - - case 'f': - case 'F': - case 'e': - case 'E': - case 'g': - case 'G': - case 'a': - case 'A': - if (lm == 'L') - specifier = Format.Lg; - else if (lm1 ||lm2 || lm == 'h') - return error(); - else - specifier = Format.g; - break; - case 'c': - if (hash || zero || - lm1 || lm2 || lm == 'h') - return error(); - specifier = lm == 'l' ? Format.lc : Format.c; - break; - case 's': - if (hash || zero || - lm1 || lm2 || lm == 'h') + if (hash || zero) return error(); - specifier = lm == 'l' ? Format.ls : Format.s; break; - case 'p': - if (lm1 || lm == 'h' || lm == 'l') + case 'd': + case 'i': + if (hash) return error(); - specifier = Format.p; break; case 'n': - if (flags || hash || zero || - width || precision || - lm == 'L') - { + if (hash || zero || precision || width || flags) return error(); - } - specifier = lm == 'l' && lm2 ? Format.lln : - lm == 'l' ? Format.ln : - lm == 'h' && lm2 ? Format.hhn : - lm == 'h' ? Format.hn : - lm == 'j' ? Format.jn : - lm == 'z' ? Format.zn : - lm == 't' ? Format.tn : - Format.n; break; default: - return error(); + break; } idx = i; @@ -616,19 +521,19 @@ unittest assert(idx == 2); idx = 0; - assert(parseFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.d); + assert(parseFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u); assert(idx == 2); idx = 0; - assert(parseFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.d); + assert(parseFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u); assert(idx == 2); idx = 0; - assert(parseFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.d); + assert(parseFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u); assert(idx == 2); idx = 0; - assert(parseFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.d); + assert(parseFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u); assert(idx == 2); idx = 0; @@ -652,7 +557,7 @@ unittest assert(idx == 2); idx = 0; - assert(parseFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.g); + assert(parseFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.error); assert(idx == 3); // width, precision @@ -674,7 +579,7 @@ unittest // Too short formats { foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12", - "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"]) + "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"]) { idx = 0; assert(parseFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); @@ -688,10 +593,11 @@ unittest "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls", "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", - "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln"]) + "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"]) { idx = 0; assert(parseFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); + import std.stdio; assert(idx == s.length); } } diff --git a/src/dmd/chkscanf.d b/src/dmd/chkscanf.d index c3d58b670d2c..6f330160e9c9 100644 --- a/src/dmd/chkscanf.d +++ b/src/dmd/chkscanf.d @@ -17,6 +17,7 @@ module dmd.chkscanf; import core.stdc.stdio : scanf; import core.stdc.ctype : isdigit; +import dmd.chkformat; import dmd.errors; import dmd.expression; import dmd.globals; @@ -100,40 +101,49 @@ bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expres final switch (fmt) { + case Format.n: case Format.d: // pointer to int if (!(t.ty == Tpointer && tnext.ty == Tint32)) errorMsg(null, slice, e, "int*", t); break; + case Format.hhn: case Format.hhd: // pointer to signed char if (!(t.ty == Tpointer && tnext.ty == Tint16)) errorMsg(null, slice, e, "byte*", t); break; + case Format.hn: case Format.hd: // pointer to short if (!(t.ty == Tpointer && tnext.ty == Tint16)) errorMsg(null, slice, e, "short*", t); break; + case Format.ln: case Format.ld: // pointer to long int if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t); break; + case Format.lln: case Format.lld: // pointer to long long int if (!(t.ty == Tpointer && tnext.ty == Tint64)) errorMsg(null, slice, e, "long*", t); break; + case Format.jn: case Format.jd: // pointer to intmax_t if (!(t.ty == Tpointer && tnext.ty == Tint64)) errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t); break; + case Format.zn: case Format.zd: // pointer to size_t if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) errorMsg(null, slice, e, "size_t*", t); break; + + case Format.tn: case Format.td: // pointer to ptrdiff_t if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32))) errorMsg(null, slice, e, "ptrdiff_t*", t); @@ -182,11 +192,13 @@ bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expres errorMsg(null, slice, e, "real*", t); break; + case Format.c: case Format.s: // pointer to char string if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) errorMsg(null, slice, e, "char*", t); break; + case Format.lc: case Format.ls: // pointer to wchar_t string const twchar_t = global.params.isWindows ? Twchar : Tdchar; if (!(t.ty == Tpointer && tnext.ty == twchar_t)) @@ -211,37 +223,6 @@ bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expres private: -/* Different kinds of formatting specifications, variations we don't - care about are merged. (Like we don't care about the difference between - f, e, g, a, etc.) - */ -enum Format -{ - d, // pointer to int - hhd, // pointer to signed char - hd, // pointer to short int - ld, // pointer to long int - lld, // pointer to long long int - jd, // pointer to intmax_t - zd, // pointer to size_t - td, // pointer to ptrdiff_t - u, // pointer to unsigned int - hhu, // pointer to unsigned char - hu, // pointer to unsigned short int - lu, // pointer to unsigned long int - llu, // pointer to unsigned long long int - ju, // pointer to uintmax_t - g, // pointer to float - lg, // pointer to double - Lg, // pointer to long double - s, // pointer to char string - ls, // pointer to wchar_t string - p, // double pointer - percent, // %% (i.e. no argument) - error, // invalid format specification -} - - /************************************** * Parse the *format specifier* which is of the form: * @@ -251,7 +232,7 @@ enum Format * format = format string * idx = index of `%` of start of format specifier, * which gets updated to index past the end of it, - * even if Format.error is returned + * even if `Format.error` is returned * asterisk = set if there is a `*` sub-specifier * Returns: * Format @@ -322,109 +303,12 @@ Format parseFormatSpecifier(scope const char[] format, ref size_t idx, return error(); } - /* Read the `length modifier` + /* Read the specifier */ - const lm = format[i]; - bool lm1; // if jztL - bool lm2; // if `hh` or `ll` - if (lm == 'j' || - lm == 'z' || - lm == 't' || - lm == 'L') - { - ++i; - if (i == length) - return error(); - lm1 = true; - } - else if (lm == 'h' || lm == 'l') - { - ++i; - if (i == length) - return error(); - lm2 = lm == format[i]; - if (lm2) - { - ++i; - if (i == length) - return error(); - } - } - - /* Read the `specifier` - */ - Format specifier; - const sc = format[i]; - ++i; - switch (sc) - { - case 'd': - case 'i': - case 'n': - if (lm == 'L') - return error(); - specifier = lm == 'h' && lm2 ? Format.hhd : - lm == 'h' ? Format.hd : - lm == 'l' && lm2 ? Format.lld : - lm == 'l' ? Format.ld : - lm == 'j' ? Format.jd : - lm == 'z' ? Format.zd : - lm == 't' ? Format.td : - Format.d; - break; - case 'u': - case 'o': - case 'x': - case 'X': - if (lm == 'L') - return error(); - specifier = lm == 'h' && lm2 ? Format.hhu : - lm == 'h' ? Format.hu : - lm == 'l' && lm2 ? Format.llu : - lm == 'l' ? Format.lu : - lm == 'j' ? Format.ju : - lm == 'z' ? Format.zd : - lm == 't' ? Format.td : - Format.u; - break; - - case 'f': - case 'F': - case 'e': - case 'E': - case 'g': - case 'G': - case 'a': - case 'A': - if (lm == 'L') - specifier = Format.Lg; - else if (lm == 'l' && !lm2) - specifier = Format.lg; - else if (lm1 || lm2 || lm == 'h') - return error(); - else - specifier = Format.g; - break; - - case 'c': - case 's': - if (lm == 'l' && !lm2) - specifier = Format.ls; - else if (lm1 || lm2 || lm == 'h') - return error(); - else - specifier = Format.s; - break; - - case 'p': - if (lm1 || lm2 || lm == 'h' || lm == 'l') - return error(); - specifier = Format.p; - break; - - default: - return error(); - } + char genSpec; + Format specifier = parseGenericFormatSpecifier(format, i, genSpec); + if (specifier == Format.error) + return error(); idx = i; return specifier; // success @@ -527,7 +411,7 @@ unittest assert(idx == 2); idx = 0; - assert(parseFormatSpecifier("%n", idx, asterisk) == Format.d); + assert(parseFormatSpecifier("%n", idx, asterisk) == Format.n); assert(idx == 2); idx = 0; @@ -551,7 +435,7 @@ unittest assert(idx == 2); idx = 0; - assert(parseFormatSpecifier("%c", idx, asterisk) == Format.s); + assert(parseFormatSpecifier("%c", idx, asterisk) == Format.c); assert(idx == 2); // asterisk From a99794d28402f1a896e8fb0b2e5da76079104d3b Mon Sep 17 00:00:00 2001 From: Luhrel Date: Mon, 2 Mar 2020 17:59:16 +0100 Subject: [PATCH 03/10] Add validation for sscanf, fscanf, sprintf and fprintf --- src/dmd/expressionsem.d | 33 ++++++++++++++++++++----------- src/dmd/id.d | 4 ++++ test/fail_compilation/chkprintf.d | 5 ++++- test/fail_compilation/chkscanf.d | 6 +++++- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/src/dmd/expressionsem.d b/src/dmd/expressionsem.d index 9054e2317863..d27ee2e5d3cd 100644 --- a/src/dmd/expressionsem.d +++ b/src/dmd/expressionsem.d @@ -2151,24 +2151,33 @@ private bool functionParameters(const ref Loc loc, Scope* sc, (*arguments)[i] = arg; } - /* If calling C printf(), check the format string against the arguments + /* If calling C scanf(), sscanf(), fscanf(), printf(), sprintf() or fprintf(), + check the format string against the arguments */ - if (tf.linkage == LINK.c && nparams >= 1 && fd && fd.ident == Id.printf) + if (tf.linkage == LINK.c && fd) { - if (auto se = (*arguments)[0].isStringExp()) + if (fd.ident == Id.printf && nparams >= 1) { - if (checkPrintfFormat(se.loc, se.peekString(), (*arguments)[1 .. nargs])) + auto se = (*arguments)[0].isStringExp(); + if (se && checkPrintfFormat(se.loc, se.peekString(), (*arguments)[1 .. nargs])) err = true; } - } - - /* If calling C scanf(), check the format string against the arguments - */ - if (tf.linkage == LINK.c && nparams >= 1 && fd && fd.ident == Id.scanf) - { - if (auto se = (*arguments)[0].isStringExp()) + else if (fd.ident == Id.scanf && nparams >= 1) + { + auto se = (*arguments)[0].isStringExp(); + if (se && checkScanfFormat(se.loc, se.peekString(), (*arguments)[1 .. nargs])) + err = true; + } + else if ((fd.ident == Id.sprintf || fd.ident == Id.fprintf) && nparams >= 2) + { + auto se = (*arguments)[1].isStringExp(); + if (se && checkPrintfFormat(se.loc, se.peekString(), (*arguments)[2 .. nargs])) + err = true; + } + else if ((fd.ident == Id.sscanf || fd.ident == Id.fscanf) && nparams >= 2) { - if (checkScanfFormat(se.loc, se.peekString(), (*arguments)[1 .. nargs])) + auto se = (*arguments)[1].isStringExp(); + if (se && checkScanfFormat(se.loc, se.peekString(), (*arguments)[2 .. nargs])) err = true; } } diff --git a/src/dmd/id.d b/src/dmd/id.d index 1d9fca2e7a82..7d2588cb7c24 100644 --- a/src/dmd/id.d +++ b/src/dmd/id.d @@ -133,8 +133,12 @@ immutable Msgtable[] msgtable = { "_assert", "assert" }, { "_unittest", "unittest" }, { "_body", "body" }, + { "fprintf" }, { "printf" }, + { "sprintf" }, + { "fscanf" }, { "scanf" }, + { "sscanf" }, { "TypeInfo" }, { "TypeInfo_Class" }, diff --git a/test/fail_compilation/chkprintf.d b/test/fail_compilation/chkprintf.d index 6010e3565b6d..6460199bf73e 100644 --- a/test/fail_compilation/chkprintf.d +++ b/test/fail_compilation/chkprintf.d @@ -20,6 +20,8 @@ fail_compilation/chkprintf.d(115): Deprecation: argument `16L` for format specif fail_compilation/chkprintf.d(116): Deprecation: argument `17L` for format specification `"%c"` must be `char`, not `long` fail_compilation/chkprintf.d(117): Deprecation: argument `& u` for format specification `"%s"` must be `char*`, not `int*` fail_compilation/chkprintf.d(118): Deprecation: argument `& u` for format specification `"%ls"` must be `wchar_t*`, not `int*` +fail_compilation/chkprintf.d(119): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` +fail_compilation/chkprintf.d(120): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` --- */ @@ -46,4 +48,5 @@ void test16() { printf("%c\n", 16L); } void test17() { printf("%c\n", 17L); } void test18() { int u; printf("%s\n", &u); } void test19() { int u; printf("%ls\n", &u); } - +void test20() { int u; char[] s; sprintf(&s[0], "%d\n", &u); } +void test21() { int u; fprintf(null, "%d\n", &u); } diff --git a/test/fail_compilation/chkscanf.d b/test/fail_compilation/chkscanf.d index ac97b9b08e47..783dc135cf0c 100644 --- a/test/fail_compilation/chkscanf.d +++ b/test/fail_compilation/chkscanf.d @@ -32,11 +32,13 @@ fail_compilation/chkscanf.d(129): Deprecation: argument `& u` for format specifi fail_compilation/chkscanf.d(130): Deprecation: format specifier `"%[n"` is invalid fail_compilation/chkscanf.d(131): Deprecation: format specifier `"%]"` is invalid fail_compilation/chkscanf.d(132): Deprecation: argument `& u` for format specification `"%90s"` must be `char*`, not `int*` +fail_compilation/chkscanf.d(133): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` +fail_compilation/chkscanf.d(134): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` --- */ -import core.stdc.stdio : scanf; +import core.stdc.stdio : fscanf, scanf, sscanf; #line 100 @@ -72,3 +74,5 @@ void test29() { int u; scanf("%hhn\n", &u); } void test30() { int u; scanf("%[n", &u); } void test31() { int u; scanf("%]\n", &u); } void test32() { int u; scanf("%90s\n", &u); } +void test33() { sscanf("1234", "%d\n", 0L); } +void test34() { fscanf(null, "%d\n", 0L); } From e06fd60cf5de4e1fe37630416492f1f8fe32be1d Mon Sep 17 00:00:00 2001 From: Luhrel Date: Tue, 3 Mar 2020 11:39:04 +0100 Subject: [PATCH 04/10] Update documentation --- changelog/chkprintf.md | 3 ++- changelog/chkscanf.md | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/changelog/chkprintf.md b/changelog/chkprintf.md index d55c902412bf..9d9b61b4508f 100644 --- a/changelog/chkprintf.md +++ b/changelog/chkprintf.md @@ -2,6 +2,8 @@ Follows the C99 specification 7.19.6.1 for printf. +sprintf and fprintf are also affected by this change. + Takes a generous, rather than strict, view of compatiblity. For example, an unsigned value can be formatted with a signed specifier. @@ -47,4 +49,3 @@ printf("%zd\n", s.sizeof); long i; printf("%lld\n", i); ``` - diff --git a/changelog/chkscanf.md b/changelog/chkscanf.md index 7493051e586c..18af58f6d597 100644 --- a/changelog/chkscanf.md +++ b/changelog/chkscanf.md @@ -1,5 +1,7 @@ # Validate scanf arguments against format specifiers +sscanf and fscanf are also affected by this change. + Follows the C99 specification 7.19.6.2 for scanf. Takes a strict view of compatiblity. @@ -40,9 +42,9 @@ scanf("%lld%*c\n", u); should be replaced with: ``` int i; -printf("%d\n", &i; +scanf("%d\n", &i; size_t s; scanf("%zd\n", &s); ulong u; -printf("%llu%*c\n", u); +scanf("%llu%*c\n", u); ``` From 985e8cf26e4337a4126592ad63f9b0cbe629af0c Mon Sep 17 00:00:00 2001 From: Luhrel Date: Tue, 3 Mar 2020 11:54:56 +0100 Subject: [PATCH 05/10] Simplifying code as requested. --- src/dmd/chkscanf.d | 10 ++++------ src/dmd/expressionsem.d | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/dmd/chkscanf.d b/src/dmd/chkscanf.d index 6f330160e9c9..2c7732d007e2 100644 --- a/src/dmd/chkscanf.d +++ b/src/dmd/chkscanf.d @@ -271,13 +271,11 @@ Format parseFormatSpecifier(scope const char[] format, ref size_t idx, } // fieldWidth + while (isdigit(format[i])) { - while (isdigit(format[i])) - { - i++; - if (i == length) - return error(); - } + i++; + if (i == length) + return error(); } /* Read the scanset diff --git a/src/dmd/expressionsem.d b/src/dmd/expressionsem.d index d27ee2e5d3cd..76d84d5b1768 100644 --- a/src/dmd/expressionsem.d +++ b/src/dmd/expressionsem.d @@ -2156,7 +2156,37 @@ private bool functionParameters(const ref Loc loc, Scope* sc, */ if (tf.linkage == LINK.c && fd) { - if (fd.ident == Id.printf && nparams >= 1) + int paramOffset = 0; + bool function(ref const(Loc) loc, scope const(char[]) format, scope Expression[] args) chkFn; + + if (fd.ident == Id.printf) + { + paramOffset = 1; + chkFn = &checkPrintfFormat; + } + else if (fd.ident == Id.scanf) + { + paramOffset = 1; + chkFn = &checkScanfFormat; + } + else if (fd.ident == Id.sprintf || fd.ident == Id.fprintf) + { + paramOffset = 2; + chkFn = &checkPrintfFormat; + } + else if (fd.ident == Id.sscanf || fd.ident == Id.fscanf) + { + paramOffset = 2; + chkFn = &checkScanfFormat; + } + + if (paramOffset && nparams >= paramOffset) + { + auto se = (*arguments)[paramOffset - 1].isStringExp(); + if (se && chkFn(se.loc, se.peekString(), (*arguments)[paramOffset .. nargs])) + err = true; + } + /*if (fd.ident == Id.printf && nparams >= 1) { auto se = (*arguments)[0].isStringExp(); if (se && checkPrintfFormat(se.loc, se.peekString(), (*arguments)[1 .. nargs])) @@ -2179,7 +2209,7 @@ private bool functionParameters(const ref Loc loc, Scope* sc, auto se = (*arguments)[1].isStringExp(); if (se && checkScanfFormat(se.loc, se.peekString(), (*arguments)[2 .. nargs])) err = true; - } + }*/ } /* Remaining problems: From bbc5c8d526223c5b88a39d27b7f7ab63d158f530 Mon Sep 17 00:00:00 2001 From: Luhrel Date: Tue, 3 Mar 2020 12:38:41 +0100 Subject: [PATCH 06/10] Merge chkscanf and chkprintf into chkformat --- src/dmd/chkformat.d | 1055 ++++++++++++++++++++++++++++- src/dmd/chkprintf.d | 604 ----------------- src/dmd/chkscanf.d | 496 -------------- src/dmd/expressionsem.d | 3 +- test/fail_compilation/chkformat.d | 121 ++++ test/fail_compilation/chkprintf.d | 52 -- test/fail_compilation/chkscanf.d | 78 --- 7 files changed, 1176 insertions(+), 1233 deletions(-) delete mode 100644 src/dmd/chkprintf.d delete mode 100644 src/dmd/chkscanf.d create mode 100644 test/fail_compilation/chkformat.d delete mode 100644 test/fail_compilation/chkprintf.d delete mode 100644 test/fail_compilation/chkscanf.d diff --git a/src/dmd/chkformat.d b/src/dmd/chkformat.d index f95aa11d1b2e..7b033287612c 100644 --- a/src/dmd/chkformat.d +++ b/src/dmd/chkformat.d @@ -1,5 +1,5 @@ /** - * Parse the `format` string. Made for `scanf` and `printf` checks. + * Check the arguments to `printf` and `scanf` against the `format` string. * * Compiler implementation of the * $(LINK2 http://www.dlang.org, D programming language). @@ -13,6 +13,694 @@ */ module dmd.chkformat; +//import core.stdc.stdio : printf, scanf; +import core.stdc.ctype : isdigit; + +import dmd.errors; +import dmd.expression; +import dmd.globals; +import dmd.mtype; +import dmd.target; + + +/****************************************** + * Check that arguments to a printf format string are compatible + * with that string. Issue errors for incompatibilities. + * + * Follows the C99 specification for printf. + * + * Takes a generous, rather than strict, view of compatiblity. + * For example, an unsigned value can be formatted with a signed specifier. + * + * Diagnosed incompatibilities are: + * + * 1. incompatible sizes which will cause argument misalignment + * 2. deferencing arguments that are not pointers + * 3. insufficient number of arguments + * 4. struct arguments + * 5. array and slice arguments + * 6. non-pointer arguments to `s` specifier + * 7. non-standard formats + * 8. undefined behavior per C99 + * + * Per the C Standard, extra arguments are ignored. + * + * No attempt is made to fix the arguments or the format string. + * + * Returns: + * `true` if errors occurred + * References: + * C99 7.19.6.1 + * http://www.cplusplus.com/reference/cstdio/printf/ + */ +bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args) +{ + //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr); + size_t n = 0; + for (size_t i = 0; i < format.length;) + { + if (format[i] != '%') + { + ++i; + continue; + } + bool widthStar; + bool precisionStar; + size_t j = i; + const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar); + const slice = format[i .. j]; + i = j; + + if (fmt == Format.percent) + continue; // "%%", no arguments + + Expression getNextArg() + { + if (n == args.length) + { + deprecation(loc, "more format specifiers than %d arguments", cast(int)n); + return null; + } + return args[n++]; + } + + void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual) + { + deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", + prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); + } + + if (widthStar) + { + auto e = getNextArg(); + if (!e) + return true; + auto t = e.type.toBasetype(); + if (t.ty != Tint32 && t.ty != Tuns32) + errorMsg("width ", slice, e, "int", t); + } + + if (precisionStar) + { + auto e = getNextArg(); + if (!e) + return true; + auto t = e.type.toBasetype(); + if (t.ty != Tint32 && t.ty != Tuns32) + errorMsg("precision ", slice, e, "int", t); + } + + auto e = getNextArg(); + if (!e) + return true; + auto t = e.type.toBasetype(); + auto tnext = t.nextOf(); + const c_longsize = target.c.longsize; + const is64bit = global.params.is64bit; + + final switch (fmt) + { + case Format.u: // unsigned int + case Format.d: // int + if (t.ty != Tint32 && t.ty != Tuns32) + errorMsg(null, slice, e, "int", t); + break; + + case Format.hhu: // unsigned char + case Format.hhd: // signed char + if (t.ty != Tint8 && t.ty != Tuns8) + errorMsg(null, slice, e, "byte", t); + break; + + case Format.hu: // unsigned short int + case Format.hd: // short int + if (t.ty != Tint16 && t.ty != Tuns16) + errorMsg(null, slice, e, "short", t); + break; + + case Format.lu: // unsigned long int + case Format.ld: // long int + if (!(t.isintegral() && t.size() == c_longsize)) + errorMsg(null, slice, e, (c_longsize == 4 ? "int" : "long"), t); + break; + + case Format.llu: // unsigned long long int + case Format.lld: // long long int + if (t.ty != Tint64 && t.ty != Tuns64) + errorMsg(null, slice, e, "long", t); + break; + + case Format.ju: // uintmax_t + case Format.jd: // intmax_t + if (t.ty != Tint64 && t.ty != Tuns64) + errorMsg(null, slice, e, "core.stdc.stdint.intmax_t", t); + break; + + case Format.zd: // size_t + if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4))) + errorMsg(null, slice, e, "size_t", t); + break; + + case Format.td: // ptrdiff_t + if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4))) + errorMsg(null, slice, e, "ptrdiff_t", t); + break; + + case Format.g: // double + if (t.ty != Tfloat64 && t.ty != Timaginary64) + errorMsg(null, slice, e, "double", t); + break; + + case Format.Lg: // long double + if (t.ty != Tfloat80 && t.ty != Timaginary80) + errorMsg(null, slice, e, "real", t); + break; + + case Format.p: // pointer + if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray) + errorMsg(null, slice, e, "void*", t); + break; + + case Format.n: // pointer to int + if (!(t.ty == Tpointer && tnext.ty == Tint32)) + errorMsg(null, slice, e, "int*", t); + break; + + case Format.ln: // pointer to long int + if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) + errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t); + break; + + case Format.lln: // pointer to long long int + if (!(t.ty == Tpointer && tnext.ty == Tint64)) + errorMsg(null, slice, e, "long*", t); + break; + + case Format.hn: // pointer to short + if (!(t.ty == Tpointer && tnext.ty == Tint16)) + errorMsg(null, slice, e, "short*", t); + break; + + case Format.hhn: // pointer to signed char + if (!(t.ty == Tpointer && tnext.ty == Tint16)) + errorMsg(null, slice, e, "byte*", t); + break; + + case Format.jn: // pointer to intmax_t + if (!(t.ty == Tpointer && tnext.ty == Tint64)) + errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t); + break; + + case Format.zn: // pointer to size_t + if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) + errorMsg(null, slice, e, "size_t*", t); + break; + case Format.tn: // pointer to ptrdiff_t + if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32))) + errorMsg(null, slice, e, "ptrdiff_t*", t); + break; + + case Format.c: // char + if (t.ty != Tint32 && t.ty != Tuns32) + errorMsg(null, slice, e, "char", t); + break; + + case Format.lc: // wint_t + if (t.ty != Tint32 && t.ty != Tuns32) + errorMsg(null, slice, e, "wchar_t", t); + break; + + case Format.s: // pointer to char string + if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) + errorMsg(null, slice, e, "char*", t); + break; + + case Format.ls: // pointer to wchar_t string + const twchar_t = global.params.isWindows ? Twchar : Tdchar; + if (!(t.ty == Tpointer && tnext.ty == twchar_t)) + errorMsg(null, slice, e, "wchar_t*", t); + break; + + case Format.error: + deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); + break; + + case Format.lg: + case Format.percent: + assert(0); + } + } + return false; +} + +/****************************************** + * Check that arguments to a scanf format string are compatible + * with that string. Issue errors for incompatibilities. + * + * Follows the C99 specification for scanf. + * + * Takes a generous, rather than strict, view of compatiblity. + * For example, an unsigned value can be formatted with a signed specifier. + * + * Diagnosed incompatibilities are: + * + * 1. incompatible sizes which will cause argument misalignment + * 2. deferencing arguments that are not pointers + * 3. insufficient number of arguments + * 4. struct arguments + * 5. array and slice arguments + * 6. non-standard formats + * 7. undefined behavior per C99 + * + * Per the C Standard, extra arguments are ignored. + * + * No attempt is made to fix the arguments or the format string. + * + * Returns: + * `true` if errors occurred + * References: + * C99 7.19.6.2 + * http://www.cplusplus.com/reference/cstdio/scanf/ + */ +bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args) +{ + size_t n = 0; + for (size_t i = 0; i < format.length;) + { + if (format[i] != '%') + { + ++i; + continue; + } + bool asterisk; + size_t j = i; + const fmt = parseScanfFormatSpecifier(format, j, asterisk); + const slice = format[i .. j]; + i = j; + + if (fmt == Format.percent || asterisk) + continue; // "%%", "%*": no arguments + + Expression getNextArg() + { + if (n == args.length) + { + if (!asterisk) + deprecation(loc, "more format specifiers than %d arguments", cast(int)n); + return null; + } + return args[n++]; + } + + void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual) + { + deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", + prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); + } + + auto e = getNextArg(); + if (!e) + return true; + + auto t = e.type.toBasetype(); + auto tnext = t.nextOf(); + const c_longsize = target.c.longsize; + const is64bit = global.params.is64bit; + + final switch (fmt) + { + case Format.n: + case Format.d: // pointer to int + if (!(t.ty == Tpointer && tnext.ty == Tint32)) + errorMsg(null, slice, e, "int*", t); + break; + + case Format.hhn: + case Format.hhd: // pointer to signed char + if (!(t.ty == Tpointer && tnext.ty == Tint16)) + errorMsg(null, slice, e, "byte*", t); + break; + + case Format.hn: + case Format.hd: // pointer to short + if (!(t.ty == Tpointer && tnext.ty == Tint16)) + errorMsg(null, slice, e, "short*", t); + break; + + case Format.ln: + case Format.ld: // pointer to long int + if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) + errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t); + break; + + case Format.lln: + case Format.lld: // pointer to long long int + if (!(t.ty == Tpointer && tnext.ty == Tint64)) + errorMsg(null, slice, e, "long*", t); + break; + + case Format.jn: + case Format.jd: // pointer to intmax_t + if (!(t.ty == Tpointer && tnext.ty == Tint64)) + errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t); + break; + + case Format.zn: + case Format.zd: // pointer to size_t + if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) + errorMsg(null, slice, e, "size_t*", t); + break; + + case Format.tn: + case Format.td: // pointer to ptrdiff_t + if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32))) + errorMsg(null, slice, e, "ptrdiff_t*", t); + break; + + case Format.u: // pointer to unsigned int + if (!(t.ty == Tpointer && tnext.ty == Tuns32)) + errorMsg(null, slice, e, "uint*", t); + break; + + case Format.hhu: // pointer to unsigned char + if (!(t.ty == Tpointer && tnext.ty == Tuns8)) + errorMsg(null, slice, e, "ubyte*", t); + break; + + case Format.hu: // pointer to unsigned short int + if (!(t.ty == Tpointer && tnext.ty == Tuns16)) + errorMsg(null, slice, e, "ushort*", t); + break; + + case Format.lu: // pointer to unsigned long int + if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) + errorMsg(null, slice, e, (c_longsize == 4 ? "uint*" : "ulong*"), t); + break; + + case Format.llu: // pointer to unsigned long long int + if (!(t.ty == Tpointer && tnext.ty == Tuns64)) + errorMsg(null, slice, e, "ulong*", t); + break; + + case Format.ju: // pointer to uintmax_t + if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) + errorMsg(null, slice, e, "ulong*", t); + break; + + case Format.g: // pointer to float + if (!(t.ty == Tpointer && tnext.ty == Tfloat32)) + errorMsg(null, slice, e, "float*", t); + break; + case Format.lg: // pointer to double + if (!(t.ty == Tpointer && tnext.ty == Tfloat64)) + errorMsg(null, slice, e, "double*", t); + break; + case Format.Lg: // pointer to long double + if (!(t.ty == Tpointer && tnext.ty == Tfloat80)) + errorMsg(null, slice, e, "real*", t); + break; + + case Format.c: + case Format.s: // pointer to char string + if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) + errorMsg(null, slice, e, "char*", t); + break; + + case Format.lc: + case Format.ls: // pointer to wchar_t string + const twchar_t = global.params.isWindows ? Twchar : Tdchar; + if (!(t.ty == Tpointer && tnext.ty == twchar_t)) + errorMsg(null, slice, e, "wchar_t*", t); + break; + + case Format.p: // double pointer + if (!(t.ty == Tpointer && tnext.ty == Tpointer)) + errorMsg(null, slice, e, "void**", t); + break; + + case Format.error: + deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); + break; + + case Format.percent: + assert(0); + } + } + return false; +} + +private: + +/************************************** + * Parse the *format specifier* which is of the form: + * + * `%[*][width][length]specifier` + * + * Params: + * format = format string + * idx = index of `%` of start of format specifier, + * which gets updated to index past the end of it, + * even if `Format.error` is returned + * asterisk = set if there is a `*` sub-specifier + * Returns: + * Format + */ +pure nothrow @safe +Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx, + out bool asterisk) +{ + auto i = idx; + assert(format[i] == '%'); + const length = format.length; + + Format error() + { + idx = i; + return Format.error; + } + + ++i; + if (i == length) + return error(); + + if (format[i] == '%') + { + idx = i + 1; + return Format.percent; + } + + // * sub-specifier + if (format[i] == '*') + { + ++i; + if (i == length) + return error(); + asterisk = true; + } + + // fieldWidth + while (isdigit(format[i])) + { + i++; + if (i == length) + return error(); + } + + /* Read the scanset + * A scanset can be anything, so we just check that it is paired + */ + if (format[i] == '[') + { + while (i < length) + { + if (format[i] == ']') + break; + ++i; + } + + // no `]` found + if (i == length) + return error(); + + ++i; + // no specifier after `]` + // it could be mixed with the one above, but then idx won't have the right index + if (i == length) + return error(); + } + + /* Read the specifier + */ + char genSpec; + Format specifier = parseGenericFormatSpecifier(format, i, genSpec); + if (specifier == Format.error) + return error(); + + idx = i; + return specifier; // success +} + +/************************************** + * Parse the *format specifier* which is of the form: + * + * `%[flags][field width][.precision][length modifier]specifier` + * + * Params: + * format = format string + * idx = index of `%` of start of format specifier, + * which gets updated to index past the end of it, + * even if `Format.error` is returned + * widthStar = set if * for width + * precisionStar = set if * for precision + * Returns: + * Format + */ +pure nothrow @safe +Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx, + out bool widthStar, out bool precisionStar) +{ + auto i = idx; + assert(format[i] == '%'); + const length = format.length; + bool hash; + bool zero; + bool flags; + bool width; + bool precision; + + Format error() + { + idx = i; + return Format.error; + } + + ++i; + if (i == length) + return error(); + + if (format[i] == '%') + { + idx = i + 1; + return Format.percent; + } + + /* Read the `flags` + */ + while (1) + { + const c = format[i]; + if (c == '-' || + c == '+' || + c == ' ') + { + flags = true; + } + else if (c == '#') + { + hash = true; + } + else if (c == '0') + { + zero = true; + } + else + break; + ++i; + if (i == length) + return error(); + } + + /* Read the `field width` + */ + { + const c = format[i]; + if (c == '*') + { + width = true; + widthStar = true; + ++i; + if (i == length) + return error(); + } + else if ('1' <= c && c <= '9') + { + width = true; + ++i; + if (i == length) + return error(); + while ('0' <= format[i] && format[i] <= '9') + { + ++i; + if (i == length) + return error(); + } + } + } + + /* Read the `precision` + */ + if (format[i] == '.') + { + precision = true; + ++i; + if (i == length) + return error(); + const c = format[i]; + if (c == '*') + { + precisionStar = true; + ++i; + if (i == length) + return error(); + } + else if ('0' <= c && c <= '9') + { + ++i; + if (i == length) + return error(); + while ('0' <= format[i] && format[i] <= '9') + { + ++i; + if (i == length) + return error(); + } + } + } + + /* Read the specifier + */ + char genSpec; + Format specifier = parseGenericFormatSpecifier(format, i, genSpec); + if (specifier == Format.error || specifier == Format.lg) + return error(); + + switch (genSpec) + { + case 'c': + case 's': + if (hash || zero) + return error(); + break; + + case 'd': + case 'i': + if (hash) + return error(); + break; + + case 'n': + if (hash || zero || precision || width || flags) + return error(); + break; + + default: + break; + } + + idx = i; + return specifier; // success +} + /* Different kinds of formatting specifications, variations we don't care about are merged. (Like we don't care about the difference between f, e, g, a, etc.) @@ -225,4 +913,369 @@ unittest idx = 0; assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error); + + /* + */ + + bool widthStar; + bool precisionStar; + + // one for each Format + idx = 0; + assert(parsePrintfFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d); + assert(idx == 2); + assert(!widthStar && !precisionStar); + + idx = 0; + assert(parsePrintfFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld); + assert(idx == 4); + + idx = 0; + assert(parsePrintfFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln); + assert(idx == 4); + + idx = 0; + assert(parsePrintfFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn); + assert(idx == 4); + + idx = 0; + assert(parsePrintfFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls); + assert(idx == 3); + + idx = 0; + assert(parsePrintfFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent); + assert(idx == 2); + + // Synonyms + idx = 0; + assert(parsePrintfFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%a", idx, widthStar, precisionStar) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parsePrintfFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.error); + assert(idx == 3); + + // width, precision + idx = 0; + assert(parsePrintfFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d); + assert(idx == 3); + assert(widthStar && !precisionStar); + + idx = 0; + assert(parsePrintfFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d); + assert(idx == 4); + assert(!widthStar && precisionStar); + + idx = 0; + assert(parsePrintfFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d); + assert(idx == 5); + assert(widthStar && precisionStar); + + // Too short formats + { + foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12", + "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"]) + { + idx = 0; + assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); + assert(idx == s.length); + } + } + + // Undefined format combinations + { + foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", + "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", + "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls", + "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", + "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"]) + { + idx = 0; + assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); + import std.stdio; + assert(idx == s.length); + } + } + + /* parseScanfFormatSpecifier + */ + + bool asterisk; + + // one for each Format + idx = 0; + assert(parseScanfFormatSpecifier("%d", idx, asterisk) == Format.d); + assert(idx == 2); + assert(!asterisk); + + idx = 0; + assert(parseScanfFormatSpecifier("%hhd", idx, asterisk) == Format.hhd); + assert(idx == 4); + + idx = 0; + assert(parseScanfFormatSpecifier("%hd", idx, asterisk) == Format.hd); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%ld", idx, asterisk) == Format.ld); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%lld", idx, asterisk) == Format.lld); + assert(idx == 4); + + idx = 0; + assert(parseScanfFormatSpecifier("%jd", idx, asterisk) == Format.jd); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%zd", idx, asterisk) == Format.zd); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%td", idx, asterisk,) == Format.td); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%u", idx, asterisk) == Format.u); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu); + assert(idx == 4); + + idx = 0; + assert(parseScanfFormatSpecifier("%hu", idx, asterisk) == Format.hu); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%lu", idx, asterisk) == Format.lu); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%llu", idx, asterisk) == Format.llu); + assert(idx == 4); + + idx = 0; + assert(parseScanfFormatSpecifier("%ju", idx, asterisk) == Format.ju); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%g", idx, asterisk) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%lg", idx, asterisk) == Format.lg); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%Lg", idx, asterisk) == Format.Lg); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%p", idx, asterisk) == Format.p); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%s", idx, asterisk) == Format.s); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%ls", idx, asterisk,) == Format.ls); + assert(idx == 3); + + idx = 0; + assert(parseScanfFormatSpecifier("%%", idx, asterisk) == Format.percent); + assert(idx == 2); + + // Synonyms + idx = 0; + assert(parseScanfFormatSpecifier("%i", idx, asterisk) == Format.d); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%n", idx, asterisk) == Format.n); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%o", idx, asterisk) == Format.u); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%x", idx, asterisk) == Format.u); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%f", idx, asterisk) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%e", idx, asterisk) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%a", idx, asterisk) == Format.g); + assert(idx == 2); + + idx = 0; + assert(parseScanfFormatSpecifier("%c", idx, asterisk) == Format.c); + assert(idx == 2); + + // asterisk + idx = 0; + assert(parseScanfFormatSpecifier("%*d", idx, asterisk) == Format.d); + assert(idx == 3); + assert(asterisk); + + idx = 0; + assert(parseScanfFormatSpecifier("%9ld", idx, asterisk) == Format.ld); + assert(idx == 4); + assert(!asterisk); + + idx = 0; + assert(parseScanfFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd); + assert(idx == 10); + assert(asterisk); + + // scansets + idx = 0; + assert(parseScanfFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s); + assert(idx == 10); + assert(!asterisk); + + idx = 0; + assert(parseScanfFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd); + assert(idx == 12); + assert(asterisk); + + // Too short formats + foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19", + "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"]) + { + idx = 0; + assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); + assert(idx == s.length); + } + + + // Undefined format combinations + foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", + "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", + "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", + "%-", "%+", "%#", "%0", "%.", "%Ln"]) + { + idx = 0; + assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); + assert(idx == s.length); + + } + + // Invalid scansets + foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"]) + { + idx = 0; + assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); + assert(idx == s.length); + } + } diff --git a/src/dmd/chkprintf.d b/src/dmd/chkprintf.d deleted file mode 100644 index 7f048ddc8120..000000000000 --- a/src/dmd/chkprintf.d +++ /dev/null @@ -1,604 +0,0 @@ -/** - * Check the arguments to `printf` against the `format` string. - * - * Compiler implementation of the - * $(LINK2 http://www.dlang.org, D programming language). - * - * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved - * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) - * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) - * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkprintf.d, _chkprintf.d) - * Documentation: https://dlang.org/phobos/dmd_chkprintf.html - * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkprintf.d - */ - -module dmd.chkprintf; - -import core.stdc.stdio : printf; - -import dmd.chkformat; -import dmd.errors; -import dmd.expression; -import dmd.globals; -import dmd.mtype; -import dmd.target; - -/****************************************** - * Check that arguments to a printf format string are compatible - * with that string. Issue errors for incompatibilities. - * - * Follows the C99 specification for printf. - * - * Takes a generous, rather than strict, view of compatiblity. - * For example, an unsigned value can be formatted with a signed specifier. - * - * Diagnosed incompatibilities are: - * - * 1. incompatible sizes which will cause argument misalignment - * 2. deferencing arguments that are not pointers - * 3. insufficient number of arguments - * 4. struct arguments - * 5. array and slice arguments - * 6. non-pointer arguments to `s` specifier - * 7. non-standard formats - * 8. undefined behavior per C99 - * - * Per the C Standard, extra arguments are ignored. - * - * No attempt is made to fix the arguments or the format string. - * - * Returns: - * `true` if errors occurred - * References: - * C99 7.19.6.1 - * http://www.cplusplus.com/reference/cstdio/printf/ - */ - -bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args) -{ - //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr); - size_t n = 0; - for (size_t i = 0; i < format.length;) - { - if (format[i] != '%') - { - ++i; - continue; - } - bool widthStar; - bool precisionStar; - size_t j = i; - const fmt = parseFormatSpecifier(format, j, widthStar, precisionStar); - const slice = format[i .. j]; - i = j; - - if (fmt == Format.percent) - continue; // "%%", no arguments - - Expression getNextArg() - { - if (n == args.length) - { - deprecation(loc, "more format specifiers than %d arguments", cast(int)n); - return null; - } - return args[n++]; - } - - void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual) - { - deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", - prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); - } - - if (widthStar) - { - auto e = getNextArg(); - if (!e) - return true; - auto t = e.type.toBasetype(); - if (t.ty != Tint32 && t.ty != Tuns32) - errorMsg("width ", slice, e, "int", t); - } - - if (precisionStar) - { - auto e = getNextArg(); - if (!e) - return true; - auto t = e.type.toBasetype(); - if (t.ty != Tint32 && t.ty != Tuns32) - errorMsg("precision ", slice, e, "int", t); - } - - auto e = getNextArg(); - if (!e) - return true; - auto t = e.type.toBasetype(); - auto tnext = t.nextOf(); - const c_longsize = target.c.longsize; - const is64bit = global.params.is64bit; - - final switch (fmt) - { - case Format.u: // unsigned int - case Format.d: // int - if (t.ty != Tint32 && t.ty != Tuns32) - errorMsg(null, slice, e, "int", t); - break; - - case Format.hhu: // unsigned char - case Format.hhd: // signed char - if (t.ty != Tint8 && t.ty != Tuns8) - errorMsg(null, slice, e, "byte", t); - break; - - case Format.hu: // unsigned short int - case Format.hd: // short int - if (t.ty != Tint16 && t.ty != Tuns16) - errorMsg(null, slice, e, "short", t); - break; - - case Format.lu: // unsigned long int - case Format.ld: // long int - if (!(t.isintegral() && t.size() == c_longsize)) - errorMsg(null, slice, e, (c_longsize == 4 ? "int" : "long"), t); - break; - - case Format.llu: // unsigned long long int - case Format.lld: // long long int - if (t.ty != Tint64 && t.ty != Tuns64) - errorMsg(null, slice, e, "long", t); - break; - - case Format.ju: // uintmax_t - case Format.jd: // intmax_t - if (t.ty != Tint64 && t.ty != Tuns64) - errorMsg(null, slice, e, "core.stdc.stdint.intmax_t", t); - break; - - case Format.zd: // size_t - if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4))) - errorMsg(null, slice, e, "size_t", t); - break; - - case Format.td: // ptrdiff_t - if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4))) - errorMsg(null, slice, e, "ptrdiff_t", t); - break; - - case Format.g: // double - if (t.ty != Tfloat64 && t.ty != Timaginary64) - errorMsg(null, slice, e, "double", t); - break; - - case Format.Lg: // long double - if (t.ty != Tfloat80 && t.ty != Timaginary80) - errorMsg(null, slice, e, "real", t); - break; - - case Format.p: // pointer - if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray) - errorMsg(null, slice, e, "void*", t); - break; - - case Format.n: // pointer to int - if (!(t.ty == Tpointer && tnext.ty == Tint32)) - errorMsg(null, slice, e, "int*", t); - break; - - case Format.ln: // pointer to long int - if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) - errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t); - break; - - case Format.lln: // pointer to long long int - if (!(t.ty == Tpointer && tnext.ty == Tint64)) - errorMsg(null, slice, e, "long*", t); - break; - - case Format.hn: // pointer to short - if (!(t.ty == Tpointer && tnext.ty == Tint16)) - errorMsg(null, slice, e, "short*", t); - break; - - case Format.hhn: // pointer to signed char - if (!(t.ty == Tpointer && tnext.ty == Tint16)) - errorMsg(null, slice, e, "byte*", t); - break; - - case Format.jn: // pointer to intmax_t - if (!(t.ty == Tpointer && tnext.ty == Tint64)) - errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t); - break; - - case Format.zn: // pointer to size_t - if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) - errorMsg(null, slice, e, "size_t*", t); - break; - case Format.tn: // pointer to ptrdiff_t - if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32))) - errorMsg(null, slice, e, "ptrdiff_t*", t); - break; - - case Format.c: // char - if (t.ty != Tint32 && t.ty != Tuns32) - errorMsg(null, slice, e, "char", t); - break; - - case Format.lc: // wint_t - if (t.ty != Tint32 && t.ty != Tuns32) - errorMsg(null, slice, e, "wchar_t", t); - break; - - case Format.s: // pointer to char string - if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) - errorMsg(null, slice, e, "char*", t); - break; - - case Format.ls: // pointer to wchar_t string - const twchar_t = global.params.isWindows ? Twchar : Tdchar; - if (!(t.ty == Tpointer && tnext.ty == twchar_t)) - errorMsg(null, slice, e, "wchar_t*", t); - break; - - case Format.error: - deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); - break; - - case Format.lg: - case Format.percent: - assert(0); - } - } - return false; -} - -private: - -/************************************** - * Parse the *format specifier* which is of the form: - * - * `%[flags][field width][.precision][length modifier]specifier` - * - * Params: - * format = format string - * idx = index of `%` of start of format specifier, - * which gets updated to index past the end of it, - * even if `Format.error` is returned - * widthStar = set if * for width - * precisionStar = set if * for precision - * Returns: - * Format - */ -pure nothrow @safe -Format parseFormatSpecifier(scope const char[] format, ref size_t idx, - out bool widthStar, out bool precisionStar) -{ - auto i = idx; - assert(format[i] == '%'); - const length = format.length; - bool hash; - bool zero; - bool flags; - bool width; - bool precision; - - Format error() - { - idx = i; - return Format.error; - } - - ++i; - if (i == length) - return error(); - - if (format[i] == '%') - { - idx = i + 1; - return Format.percent; - } - - /* Read the `flags` - */ - while (1) - { - const c = format[i]; - if (c == '-' || - c == '+' || - c == ' ') - { - flags = true; - } - else if (c == '#') - { - hash = true; - } - else if (c == '0') - { - zero = true; - } - else - break; - ++i; - if (i == length) - return error(); - } - - /* Read the `field width` - */ - { - const c = format[i]; - if (c == '*') - { - width = true; - widthStar = true; - ++i; - if (i == length) - return error(); - } - else if ('1' <= c && c <= '9') - { - width = true; - ++i; - if (i == length) - return error(); - while ('0' <= format[i] && format[i] <= '9') - { - ++i; - if (i == length) - return error(); - } - } - } - - /* Read the `precision` - */ - if (format[i] == '.') - { - precision = true; - ++i; - if (i == length) - return error(); - const c = format[i]; - if (c == '*') - { - precisionStar = true; - ++i; - if (i == length) - return error(); - } - else if ('0' <= c && c <= '9') - { - ++i; - if (i == length) - return error(); - while ('0' <= format[i] && format[i] <= '9') - { - ++i; - if (i == length) - return error(); - } - } - } - - /* Read the specifier - */ - char genSpec; - Format specifier = parseGenericFormatSpecifier(format, i, genSpec); - if (specifier == Format.error || specifier == Format.lg) - return error(); - - switch (genSpec) - { - case 'c': - case 's': - if (hash || zero) - return error(); - break; - - case 'd': - case 'i': - if (hash) - return error(); - break; - - case 'n': - if (hash || zero || precision || width || flags) - return error(); - break; - - default: - break; - } - - idx = i; - return specifier; // success -} - -unittest -{ - //printf("parseFormatSpecifier()\n"); - - size_t idx; - bool widthStar; - bool precisionStar; - - // one for each Format - idx = 0; - assert(parseFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d); - assert(idx == 2); - assert(!widthStar && !precisionStar); - - idx = 0; - assert(parseFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld); - assert(idx == 4); - - idx = 0; - assert(parseFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln); - assert(idx == 4); - - idx = 0; - assert(parseFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn); - assert(idx == 4); - - idx = 0; - assert(parseFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent); - assert(idx == 2); - - // Synonyms - idx = 0; - assert(parseFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%a", idx, widthStar, precisionStar) == Format.g); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.error); - assert(idx == 3); - - // width, precision - idx = 0; - assert(parseFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d); - assert(idx == 3); - assert(widthStar && !precisionStar); - - idx = 0; - assert(parseFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d); - assert(idx == 4); - assert(!widthStar && precisionStar); - - idx = 0; - assert(parseFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d); - assert(idx == 5); - assert(widthStar && precisionStar); - - // Too short formats - { - foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12", - "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"]) - { - idx = 0; - assert(parseFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); - assert(idx == s.length); - } - } - - // Undefined format combinations - { - foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", - "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", - "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls", - "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", - "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"]) - { - idx = 0; - assert(parseFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); - import std.stdio; - assert(idx == s.length); - } - } -} diff --git a/src/dmd/chkscanf.d b/src/dmd/chkscanf.d deleted file mode 100644 index 2c7732d007e2..000000000000 --- a/src/dmd/chkscanf.d +++ /dev/null @@ -1,496 +0,0 @@ -/** - * Check the arguments to `scanf` against the `format` string. - * - * Compiler implementation of the - * $(LINK2 http://www.dlang.org, D programming language). - * - * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved - * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) - * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) - * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkscanf.d, _chkscanf.d) - * Documentation: https://dlang.org/phobos/dmd_chkscanf.html - * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkscanf.d - */ - -module dmd.chkscanf; - -import core.stdc.stdio : scanf; -import core.stdc.ctype : isdigit; - -import dmd.chkformat; -import dmd.errors; -import dmd.expression; -import dmd.globals; -import dmd.mtype; -import dmd.target; - -/****************************************** - * Check that arguments to a scanf format string are compatible - * with that string. Issue errors for incompatibilities. - * - * Follows the C99 specification for scanf. - * - * Takes a generous, rather than strict, view of compatiblity. - * For example, an unsigned value can be formatted with a signed specifier. - * - * Diagnosed incompatibilities are: - * - * 1. incompatible sizes which will cause argument misalignment - * 2. deferencing arguments that are not pointers - * 3. insufficient number of arguments - * 4. struct arguments - * 5. array and slice arguments - * 6. non-standard formats - * 7. undefined behavior per C99 - * - * Per the C Standard, extra arguments are ignored. - * - * No attempt is made to fix the arguments or the format string. - * - * Returns: - * `true` if errors occurred - * References: - * C99 7.19.6.2 - * http://www.cplusplus.com/reference/cstdio/scanf/ - */ - -bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args) -{ - size_t n = 0; - for (size_t i = 0; i < format.length;) - { - if (format[i] != '%') - { - ++i; - continue; - } - bool asterisk; - size_t j = i; - const fmt = parseFormatSpecifier(format, j, asterisk); - const slice = format[i .. j]; - i = j; - - if (fmt == Format.percent || asterisk) - continue; // "%%", "%*": no arguments - - Expression getNextArg() - { - if (n == args.length) - { - if (!asterisk) - deprecation(loc, "more format specifiers than %d arguments", cast(int)n); - return null; - } - return args[n++]; - } - - void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual) - { - deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", - prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); - } - - auto e = getNextArg(); - if (!e) - return true; - - auto t = e.type.toBasetype(); - auto tnext = t.nextOf(); - const c_longsize = target.c.longsize; - const is64bit = global.params.is64bit; - - final switch (fmt) - { - case Format.n: - case Format.d: // pointer to int - if (!(t.ty == Tpointer && tnext.ty == Tint32)) - errorMsg(null, slice, e, "int*", t); - break; - - case Format.hhn: - case Format.hhd: // pointer to signed char - if (!(t.ty == Tpointer && tnext.ty == Tint16)) - errorMsg(null, slice, e, "byte*", t); - break; - - case Format.hn: - case Format.hd: // pointer to short - if (!(t.ty == Tpointer && tnext.ty == Tint16)) - errorMsg(null, slice, e, "short*", t); - break; - - case Format.ln: - case Format.ld: // pointer to long int - if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) - errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t); - break; - - case Format.lln: - case Format.lld: // pointer to long long int - if (!(t.ty == Tpointer && tnext.ty == Tint64)) - errorMsg(null, slice, e, "long*", t); - break; - - case Format.jn: - case Format.jd: // pointer to intmax_t - if (!(t.ty == Tpointer && tnext.ty == Tint64)) - errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t); - break; - - case Format.zn: - case Format.zd: // pointer to size_t - if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) - errorMsg(null, slice, e, "size_t*", t); - break; - - case Format.tn: - case Format.td: // pointer to ptrdiff_t - if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32))) - errorMsg(null, slice, e, "ptrdiff_t*", t); - break; - - case Format.u: // pointer to unsigned int - if (!(t.ty == Tpointer && tnext.ty == Tuns32)) - errorMsg(null, slice, e, "uint*", t); - break; - - case Format.hhu: // pointer to unsigned char - if (!(t.ty == Tpointer && tnext.ty == Tuns8)) - errorMsg(null, slice, e, "ubyte*", t); - break; - - case Format.hu: // pointer to unsigned short int - if (!(t.ty == Tpointer && tnext.ty == Tuns16)) - errorMsg(null, slice, e, "ushort*", t); - break; - - case Format.lu: // pointer to unsigned long int - if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) - errorMsg(null, slice, e, (c_longsize == 4 ? "uint*" : "ulong*"), t); - break; - - case Format.llu: // pointer to unsigned long long int - if (!(t.ty == Tpointer && tnext.ty == Tuns64)) - errorMsg(null, slice, e, "ulong*", t); - break; - - case Format.ju: // pointer to uintmax_t - if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32))) - errorMsg(null, slice, e, "ulong*", t); - break; - - case Format.g: // pointer to float - if (!(t.ty == Tpointer && tnext.ty == Tfloat32)) - errorMsg(null, slice, e, "float*", t); - break; - case Format.lg: // pointer to double - if (!(t.ty == Tpointer && tnext.ty == Tfloat64)) - errorMsg(null, slice, e, "double*", t); - break; - case Format.Lg: // pointer to long double - if (!(t.ty == Tpointer && tnext.ty == Tfloat80)) - errorMsg(null, slice, e, "real*", t); - break; - - case Format.c: - case Format.s: // pointer to char string - if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) - errorMsg(null, slice, e, "char*", t); - break; - - case Format.lc: - case Format.ls: // pointer to wchar_t string - const twchar_t = global.params.isWindows ? Twchar : Tdchar; - if (!(t.ty == Tpointer && tnext.ty == twchar_t)) - errorMsg(null, slice, e, "wchar_t*", t); - break; - - case Format.p: // double pointer - if (!(t.ty == Tpointer && tnext.ty == Tpointer)) - errorMsg(null, slice, e, "void**", t); - break; - - case Format.error: - deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); - break; - - case Format.percent: - assert(0); - } - } - return false; -} - -private: - -/************************************** - * Parse the *format specifier* which is of the form: - * - * `%[*][width][length]specifier` - * - * Params: - * format = format string - * idx = index of `%` of start of format specifier, - * which gets updated to index past the end of it, - * even if `Format.error` is returned - * asterisk = set if there is a `*` sub-specifier - * Returns: - * Format - */ -pure nothrow @safe -Format parseFormatSpecifier(scope const char[] format, ref size_t idx, - out bool asterisk) -{ - auto i = idx; - assert(format[i] == '%'); - const length = format.length; - - Format error() - { - idx = i; - return Format.error; - } - - ++i; - if (i == length) - return error(); - - if (format[i] == '%') - { - idx = i + 1; - return Format.percent; - } - - // * sub-specifier - if (format[i] == '*') - { - ++i; - if (i == length) - return error(); - asterisk = true; - } - - // fieldWidth - while (isdigit(format[i])) - { - i++; - if (i == length) - return error(); - } - - /* Read the scanset - * A scanset can be anything, so we just check that it is paired - */ - if (format[i] == '[') - { - while (i < length) - { - if (format[i] == ']') - break; - ++i; - } - - // no `]` found - if (i == length) - return error(); - - ++i; - // no specifier after `]` - // it could be mixed with the one above, but then idx won't have the right index - if (i == length) - return error(); - } - - /* Read the specifier - */ - char genSpec; - Format specifier = parseGenericFormatSpecifier(format, i, genSpec); - if (specifier == Format.error) - return error(); - - idx = i; - return specifier; // success -} - -unittest -{ - size_t idx; - bool asterisk; - - // one for each Format - idx = 0; - assert(parseFormatSpecifier("%d", idx, asterisk) == Format.d); - assert(idx == 2); - assert(!asterisk); - - idx = 0; - assert(parseFormatSpecifier("%hhd", idx, asterisk) == Format.hhd); - assert(idx == 4); - - idx = 0; - assert(parseFormatSpecifier("%hd", idx, asterisk) == Format.hd); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%ld", idx, asterisk) == Format.ld); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%lld", idx, asterisk) == Format.lld); - assert(idx == 4); - - idx = 0; - assert(parseFormatSpecifier("%jd", idx, asterisk) == Format.jd); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%zd", idx, asterisk) == Format.zd); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%td", idx, asterisk,) == Format.td); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%u", idx, asterisk) == Format.u); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu); - assert(idx == 4); - - idx = 0; - assert(parseFormatSpecifier("%hu", idx, asterisk) == Format.hu); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%lu", idx, asterisk) == Format.lu); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%llu", idx, asterisk) == Format.llu); - assert(idx == 4); - - idx = 0; - assert(parseFormatSpecifier("%ju", idx, asterisk) == Format.ju); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%g", idx, asterisk) == Format.g); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%lg", idx, asterisk) == Format.lg); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%Lg", idx, asterisk) == Format.Lg); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%p", idx, asterisk) == Format.p); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%s", idx, asterisk) == Format.s); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%ls", idx, asterisk,) == Format.ls); - assert(idx == 3); - - idx = 0; - assert(parseFormatSpecifier("%%", idx, asterisk) == Format.percent); - assert(idx == 2); - - // Synonyms - idx = 0; - assert(parseFormatSpecifier("%i", idx, asterisk) == Format.d); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%n", idx, asterisk) == Format.n); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%o", idx, asterisk) == Format.u); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%x", idx, asterisk) == Format.u); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%f", idx, asterisk) == Format.g); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%e", idx, asterisk) == Format.g); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%a", idx, asterisk) == Format.g); - assert(idx == 2); - - idx = 0; - assert(parseFormatSpecifier("%c", idx, asterisk) == Format.c); - assert(idx == 2); - - // asterisk - idx = 0; - assert(parseFormatSpecifier("%*d", idx, asterisk) == Format.d); - assert(idx == 3); - assert(asterisk); - - idx = 0; - assert(parseFormatSpecifier("%9ld", idx, asterisk) == Format.ld); - assert(idx == 4); - assert(!asterisk); - - idx = 0; - assert(parseFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd); - assert(idx == 10); - assert(asterisk); - - // scansets - idx = 0; - assert(parseFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s); - assert(idx == 10); - assert(!asterisk); - - idx = 0; - assert(parseFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd); - assert(idx == 12); - assert(asterisk); - - // Too short formats - foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19", - "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"]) - { - idx = 0; - assert(parseFormatSpecifier(s, idx, asterisk) == Format.error); - assert(idx == s.length); - } - - - // Undefined format combinations - foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", - "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", - "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", - "%-", "%+", "%#", "%0", "%.", "%Ln"]) - { - idx = 0; - assert(parseFormatSpecifier(s, idx, asterisk) == Format.error); - assert(idx == s.length); - - } - - // Invalid scansets - foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"]) - { - idx = 0; - assert(parseFormatSpecifier(s, idx, asterisk) == Format.error); - assert(idx == s.length); - } - -} diff --git a/src/dmd/expressionsem.d b/src/dmd/expressionsem.d index 76d84d5b1768..b92597b096c6 100644 --- a/src/dmd/expressionsem.d +++ b/src/dmd/expressionsem.d @@ -22,8 +22,7 @@ import dmd.arraytypes; import dmd.attrib; import dmd.astcodegen; import dmd.canthrow; -import dmd.chkprintf; -import dmd.chkscanf; +import dmd.chkformat; import dmd.ctorflow; import dmd.dscope; import dmd.dsymbol; diff --git a/test/fail_compilation/chkformat.d b/test/fail_compilation/chkformat.d new file mode 100644 index 000000000000..78a57e9143c5 --- /dev/null +++ b/test/fail_compilation/chkformat.d @@ -0,0 +1,121 @@ +/* +REQUIRED_ARGS: -de +TEST_OUTPUT: +--- +fail_compilation/chkformat.d(101): Deprecation: width argument `0L` for format specification `"%*.*d"` must be `int`, not `long` +fail_compilation/chkformat.d(101): Deprecation: precision argument `1L` for format specification `"%*.*d"` must be `int`, not `long` +fail_compilation/chkformat.d(101): Deprecation: argument `2L` for format specification `"%*.*d"` must be `int`, not `long` +fail_compilation/chkformat.d(103): Deprecation: argument `4` for format specification `"%lld"` must be `long`, not `int` +fail_compilation/chkformat.d(104): Deprecation: argument `5` for format specification `"%jd"` must be `core.stdc.stdint.intmax_t`, not `int` +fail_compilation/chkformat.d(105): Deprecation: argument `6.00000` for format specification `"%zd"` must be `size_t`, not `double` +fail_compilation/chkformat.d(106): Deprecation: argument `7.00000` for format specification `"%td"` must be `ptrdiff_t`, not `double` +fail_compilation/chkformat.d(107): Deprecation: argument `8.00000L` for format specification `"%g"` must be `double`, not `real` +fail_compilation/chkformat.d(108): Deprecation: argument `9.00000` for format specification `"%Lg"` must be `real`, not `double` +fail_compilation/chkformat.d(109): Deprecation: argument `10` for format specification `"%p"` must be `void*`, not `int` +fail_compilation/chkformat.d(110): Deprecation: argument `& u` for format specification `"%n"` must be `int*`, not `uint*` +fail_compilation/chkformat.d(112): Deprecation: argument `& u` for format specification `"%lln"` must be `long*`, not `int*` +fail_compilation/chkformat.d(113): Deprecation: argument `& u` for format specification `"%hn"` must be `short*`, not `int*` +fail_compilation/chkformat.d(114): Deprecation: argument `& u` for format specification `"%hhn"` must be `byte*`, not `int*` +fail_compilation/chkformat.d(115): Deprecation: argument `16L` for format specification `"%c"` must be `char`, not `long` +fail_compilation/chkformat.d(116): Deprecation: argument `17L` for format specification `"%c"` must be `char`, not `long` +fail_compilation/chkformat.d(117): Deprecation: argument `& u` for format specification `"%s"` must be `char*`, not `int*` +fail_compilation/chkformat.d(118): Deprecation: argument `& u` for format specification `"%ls"` must be `wchar_t*`, not `int*` +fail_compilation/chkformat.d(119): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` +fail_compilation/chkformat.d(120): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` +fail_compilation/chkformat.d(201): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` +fail_compilation/chkformat.d(202): Deprecation: more format specifiers than 1 arguments +fail_compilation/chkformat.d(203): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` +fail_compilation/chkformat.d(204): Deprecation: argument `0L` for format specification `"%3u"` must be `uint*`, not `long` +fail_compilation/chkformat.d(205): Deprecation: argument `u` for format specification `"%200u"` must be `uint*`, not `uint` +fail_compilation/chkformat.d(206): Deprecation: argument `3.00000` for format specification `"%hhd"` must be `byte*`, not `double` +fail_compilation/chkformat.d(207): Deprecation: argument `4` for format specification `"%hd"` must be `short*`, not `int` +fail_compilation/chkformat.d(209): Deprecation: argument `4` for format specification `"%lld"` must be `long*`, not `int` +fail_compilation/chkformat.d(210): Deprecation: argument `5` for format specification `"%jd"` must be `core.stdc.stdint.intmax_t*`, not `int` +fail_compilation/chkformat.d(211): Deprecation: argument `6.00000` for format specification `"%zd"` must be `size_t*`, not `double` +fail_compilation/chkformat.d(212): Deprecation: argument `7.00000` for format specification `"%td"` must be `ptrdiff_t*`, not `double` +fail_compilation/chkformat.d(213): Deprecation: format specifier `"%Ld"` is invalid +fail_compilation/chkformat.d(214): Deprecation: argument `0` for format specification `"%u"` must be `uint*`, not `int` +fail_compilation/chkformat.d(215): Deprecation: argument `0` for format specification `"%hhu"` must be `ubyte*`, not `int` +fail_compilation/chkformat.d(216): Deprecation: argument `0` for format specification `"%hu"` must be `ushort*`, not `int` +fail_compilation/chkformat.d(218): Deprecation: argument `0` for format specification `"%llu"` must be `ulong*`, not `int` +fail_compilation/chkformat.d(219): Deprecation: argument `0` for format specification `"%ju"` must be `ulong*`, not `int` +fail_compilation/chkformat.d(220): Deprecation: argument `0` for format specification `"%zu"` must be `size_t*`, not `int` +fail_compilation/chkformat.d(221): Deprecation: argument `0` for format specification `"%tu"` must be `ptrdiff_t*`, not `int` +fail_compilation/chkformat.d(222): Deprecation: argument `8.00000L` for format specification `"%g"` must be `float*`, not `real` +fail_compilation/chkformat.d(223): Deprecation: argument `8.00000L` for format specification `"%lg"` must be `double*`, not `real` +fail_compilation/chkformat.d(224): Deprecation: argument `9.00000` for format specification `"%Lg"` must be `real*`, not `double` +fail_compilation/chkformat.d(225): Deprecation: argument `& u` for format specification `"%s"` must be `char*`, not `int*` +fail_compilation/chkformat.d(226): Deprecation: argument `& u` for format specification `"%ls"` must be `wchar_t*`, not `int*` +fail_compilation/chkformat.d(227): Deprecation: argument `v` for format specification `"%p"` must be `void**`, not `void*` +fail_compilation/chkformat.d(228): Deprecation: argument `& u` for format specification `"%n"` must be `int*`, not `ushort*` +fail_compilation/chkformat.d(229): Deprecation: argument `& u` for format specification `"%hhn"` must be `byte*`, not `int*` +fail_compilation/chkformat.d(230): Deprecation: format specifier `"%[n"` is invalid +fail_compilation/chkformat.d(231): Deprecation: format specifier `"%]"` is invalid +fail_compilation/chkformat.d(232): Deprecation: argument `& u` for format specification `"%90s"` must be `char*`, not `int*` +fail_compilation/chkformat.d(233): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` +fail_compilation/chkformat.d(234): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` +--- +*/ + + +import core.stdc.stdio; + +#line 100 + +void test1() { printf("%*.*d\n", 0L, 1L, 2L); } +//void test3() { printf("%ld\n", 3.0); } +void test4() { printf("%lld\n", 4); } +void test5() { printf("%jd\n", 5); } +void test6() { printf("%zd\n", 6.0); } +void test7() { printf("%td\n", 7.0); } +void test8() { printf("%g\n", 8.0L); } +void test9() { printf("%Lg\n", 9.0); } +void test10() { printf("%p\n", 10); } +void test11() { uint u; printf("%n\n", &u); } +//void test12() { ushort u; printf("%ln\n", &u); } +void test13() { int u; printf("%lln\n", &u); } +void test14() { int u; printf("%hn\n", &u); } +void test15() { int u; printf("%hhn\n", &u); } +void test16() { printf("%c\n", 16L); } +void test17() { printf("%c\n", 17L); } +void test18() { int u; printf("%s\n", &u); } +void test19() { int u; printf("%ls\n", &u); } +void test20() { int u; char[] s; sprintf(&s[0], "%d\n", &u); } +void test21() { int u; fprintf(null, "%d\n", &u); } + +#line 200 + +void test31() { scanf("%d\n", 0L); } +void test32() { int i; scanf("%d %d\n", &i); } +void test33() { scanf("%d%*c\n", 0L); } +void test34() { scanf("%3u\n", 0L); } +void test35() { uint u; scanf("%200u%*s\n", u); } +void test36() { scanf("%hhd\n", 3.0); } +void test37() { scanf("%hd\n", 4); } +//void test38() { scanf("%ld\n", 3.0); } +void test39() { scanf("%lld\n", 4); } +void test40() { scanf("%jd\n", 5); } +void test41() { scanf("%zd\n", 6.0); } +void test42() { scanf("%td\n", 7.0); } +void test43() { scanf("%Ld\n", 0); } +void test44() { scanf("%u\n", 0); } +void test45() { scanf("%hhu\n", 0); } +void test46() { scanf("%hu\n", 0); } +//void test47() { scanf("%lu\n", 0); } +void test48() { scanf("%llu\n", 0); } +void test49() { scanf("%ju\n", 0); } +void test50() { scanf("%zu\n", 0); } +void test51() { scanf("%tu\n", 0); } +void test52() { scanf("%g\n", 8.0L); } +void test53() { scanf("%lg\n", 8.0L); } +void test54() { scanf("%Lg\n", 9.0); } +void test55() { int u; scanf("%s\n", &u); } +void test56() { int u; scanf("%ls\n", &u); } +void test57() { void* v; scanf("%p\n", v); } +void test58() { ushort u; scanf("%n\n", &u); } +void test59() { int u; scanf("%hhn\n", &u); } +void test60() { int u; scanf("%[n", &u); } +void test61() { int u; scanf("%]\n", &u); } +void test62() { int u; scanf("%90s\n", &u); } +void test63() { sscanf("1234", "%d\n", 0L); } +void test64() { fscanf(null, "%d\n", 0L); } diff --git a/test/fail_compilation/chkprintf.d b/test/fail_compilation/chkprintf.d deleted file mode 100644 index 6460199bf73e..000000000000 --- a/test/fail_compilation/chkprintf.d +++ /dev/null @@ -1,52 +0,0 @@ -/* -REQUIRED_ARGS: -de -TEST_OUTPUT: ---- -fail_compilation/chkprintf.d(101): Deprecation: width argument `0L` for format specification `"%*.*d"` must be `int`, not `long` -fail_compilation/chkprintf.d(101): Deprecation: precision argument `1L` for format specification `"%*.*d"` must be `int`, not `long` -fail_compilation/chkprintf.d(101): Deprecation: argument `2L` for format specification `"%*.*d"` must be `int`, not `long` -fail_compilation/chkprintf.d(103): Deprecation: argument `4` for format specification `"%lld"` must be `long`, not `int` -fail_compilation/chkprintf.d(104): Deprecation: argument `5` for format specification `"%jd"` must be `core.stdc.stdint.intmax_t`, not `int` -fail_compilation/chkprintf.d(105): Deprecation: argument `6.00000` for format specification `"%zd"` must be `size_t`, not `double` -fail_compilation/chkprintf.d(106): Deprecation: argument `7.00000` for format specification `"%td"` must be `ptrdiff_t`, not `double` -fail_compilation/chkprintf.d(107): Deprecation: argument `8.00000L` for format specification `"%g"` must be `double`, not `real` -fail_compilation/chkprintf.d(108): Deprecation: argument `9.00000` for format specification `"%Lg"` must be `real`, not `double` -fail_compilation/chkprintf.d(109): Deprecation: argument `10` for format specification `"%p"` must be `void*`, not `int` -fail_compilation/chkprintf.d(110): Deprecation: argument `& u` for format specification `"%n"` must be `int*`, not `uint*` -fail_compilation/chkprintf.d(112): Deprecation: argument `& u` for format specification `"%lln"` must be `long*`, not `int*` -fail_compilation/chkprintf.d(113): Deprecation: argument `& u` for format specification `"%hn"` must be `short*`, not `int*` -fail_compilation/chkprintf.d(114): Deprecation: argument `& u` for format specification `"%hhn"` must be `byte*`, not `int*` -fail_compilation/chkprintf.d(115): Deprecation: argument `16L` for format specification `"%c"` must be `char`, not `long` -fail_compilation/chkprintf.d(116): Deprecation: argument `17L` for format specification `"%c"` must be `char`, not `long` -fail_compilation/chkprintf.d(117): Deprecation: argument `& u` for format specification `"%s"` must be `char*`, not `int*` -fail_compilation/chkprintf.d(118): Deprecation: argument `& u` for format specification `"%ls"` must be `wchar_t*`, not `int*` -fail_compilation/chkprintf.d(119): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` -fail_compilation/chkprintf.d(120): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` ---- -*/ - - -import core.stdc.stdio; - -#line 100 - -void test1() { printf("%*.*d\n", 0L, 1L, 2L); } -//void test3() { printf("%ld\n", 3.0); } -void test4() { printf("%lld\n", 4); } -void test5() { printf("%jd\n", 5); } -void test6() { printf("%zd\n", 6.0); } -void test7() { printf("%td\n", 7.0); } -void test8() { printf("%g\n", 8.0L); } -void test9() { printf("%Lg\n", 9.0); } -void test10() { printf("%p\n", 10); } -void test11() { uint u; printf("%n\n", &u); } -//void test12() { ushort u; printf("%ln\n", &u); } -void test13() { int u; printf("%lln\n", &u); } -void test14() { int u; printf("%hn\n", &u); } -void test15() { int u; printf("%hhn\n", &u); } -void test16() { printf("%c\n", 16L); } -void test17() { printf("%c\n", 17L); } -void test18() { int u; printf("%s\n", &u); } -void test19() { int u; printf("%ls\n", &u); } -void test20() { int u; char[] s; sprintf(&s[0], "%d\n", &u); } -void test21() { int u; fprintf(null, "%d\n", &u); } diff --git a/test/fail_compilation/chkscanf.d b/test/fail_compilation/chkscanf.d deleted file mode 100644 index 783dc135cf0c..000000000000 --- a/test/fail_compilation/chkscanf.d +++ /dev/null @@ -1,78 +0,0 @@ -/* -REQUIRED_ARGS: -de -TEST_OUTPUT: ---- -fail_compilation/chkscanf.d(101): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` -fail_compilation/chkscanf.d(102): Deprecation: more format specifiers than 1 arguments -fail_compilation/chkscanf.d(103): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` -fail_compilation/chkscanf.d(104): Deprecation: argument `0L` for format specification `"%3u"` must be `uint*`, not `long` -fail_compilation/chkscanf.d(105): Deprecation: argument `u` for format specification `"%200u"` must be `uint*`, not `uint` -fail_compilation/chkscanf.d(106): Deprecation: argument `3.00000` for format specification `"%hhd"` must be `byte*`, not `double` -fail_compilation/chkscanf.d(107): Deprecation: argument `4` for format specification `"%hd"` must be `short*`, not `int` -fail_compilation/chkscanf.d(109): Deprecation: argument `4` for format specification `"%lld"` must be `long*`, not `int` -fail_compilation/chkscanf.d(110): Deprecation: argument `5` for format specification `"%jd"` must be `core.stdc.stdint.intmax_t*`, not `int` -fail_compilation/chkscanf.d(111): Deprecation: argument `6.00000` for format specification `"%zd"` must be `size_t*`, not `double` -fail_compilation/chkscanf.d(112): Deprecation: argument `7.00000` for format specification `"%td"` must be `ptrdiff_t*`, not `double` -fail_compilation/chkscanf.d(113): Deprecation: format specifier `"%Ld"` is invalid -fail_compilation/chkscanf.d(114): Deprecation: argument `0` for format specification `"%u"` must be `uint*`, not `int` -fail_compilation/chkscanf.d(115): Deprecation: argument `0` for format specification `"%hhu"` must be `ubyte*`, not `int` -fail_compilation/chkscanf.d(116): Deprecation: argument `0` for format specification `"%hu"` must be `ushort*`, not `int` -fail_compilation/chkscanf.d(118): Deprecation: argument `0` for format specification `"%llu"` must be `ulong*`, not `int` -fail_compilation/chkscanf.d(119): Deprecation: argument `0` for format specification `"%ju"` must be `ulong*`, not `int` -fail_compilation/chkscanf.d(120): Deprecation: argument `0` for format specification `"%zu"` must be `size_t*`, not `int` -fail_compilation/chkscanf.d(121): Deprecation: argument `0` for format specification `"%tu"` must be `ptrdiff_t*`, not `int` -fail_compilation/chkscanf.d(122): Deprecation: argument `8.00000L` for format specification `"%g"` must be `float*`, not `real` -fail_compilation/chkscanf.d(123): Deprecation: argument `8.00000L` for format specification `"%lg"` must be `double*`, not `real` -fail_compilation/chkscanf.d(124): Deprecation: argument `9.00000` for format specification `"%Lg"` must be `real*`, not `double` -fail_compilation/chkscanf.d(125): Deprecation: argument `& u` for format specification `"%s"` must be `char*`, not `int*` -fail_compilation/chkscanf.d(126): Deprecation: argument `& u` for format specification `"%ls"` must be `wchar_t*`, not `int*` -fail_compilation/chkscanf.d(127): Deprecation: argument `v` for format specification `"%p"` must be `void**`, not `void*` -fail_compilation/chkscanf.d(128): Deprecation: argument `& u` for format specification `"%n"` must be `int*`, not `ushort*` -fail_compilation/chkscanf.d(129): Deprecation: argument `& u` for format specification `"%hhn"` must be `byte*`, not `int*` -fail_compilation/chkscanf.d(130): Deprecation: format specifier `"%[n"` is invalid -fail_compilation/chkscanf.d(131): Deprecation: format specifier `"%]"` is invalid -fail_compilation/chkscanf.d(132): Deprecation: argument `& u` for format specification `"%90s"` must be `char*`, not `int*` -fail_compilation/chkscanf.d(133): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` -fail_compilation/chkscanf.d(134): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` ---- -*/ - - -import core.stdc.stdio : fscanf, scanf, sscanf; - -#line 100 - -void test1() { scanf("%d\n", 0L); } -void test2() { int i; scanf("%d %d\n", &i); } -void test3() { scanf("%d%*c\n", 0L); } -void test4() { scanf("%3u\n", 0L); } -void test5() { uint u; scanf("%200u%*s\n", u); } -void test6() { scanf("%hhd\n", 3.0); } -void test7() { scanf("%hd\n", 4); } -//void test8() { scanf("%ld\n", 3.0); } -void test9() { scanf("%lld\n", 4); } -void test10() { scanf("%jd\n", 5); } -void test11() { scanf("%zd\n", 6.0); } -void test12() { scanf("%td\n", 7.0); } -void test13() { scanf("%Ld\n", 0); } -void test14() { scanf("%u\n", 0); } -void test15() { scanf("%hhu\n", 0); } -void test16() { scanf("%hu\n", 0); } -//void test17() { scanf("%lu\n", 0); } -void test18() { scanf("%llu\n", 0); } -void test19() { scanf("%ju\n", 0); } -void test20() { scanf("%zu\n", 0); } -void test21() { scanf("%tu\n", 0); } -void test22() { scanf("%g\n", 8.0L); } -void test23() { scanf("%lg\n", 8.0L); } -void test24() { scanf("%Lg\n", 9.0); } -void test25() { int u; scanf("%s\n", &u); } -void test26() { int u; scanf("%ls\n", &u); } -void test27() { void* v; scanf("%p\n", v); } -void test28() { ushort u; scanf("%n\n", &u); } -void test29() { int u; scanf("%hhn\n", &u); } -void test30() { int u; scanf("%[n", &u); } -void test31() { int u; scanf("%]\n", &u); } -void test32() { int u; scanf("%90s\n", &u); } -void test33() { sscanf("1234", "%d\n", 0L); } -void test34() { fscanf(null, "%d\n", 0L); } From 547cd7832f336afef17add805060d09ec6e4de15 Mon Sep 17 00:00:00 2001 From: Luhrel Date: Tue, 3 Mar 2020 14:17:26 +0100 Subject: [PATCH 07/10] Fix runnable/test20.d printf args --- test/runnable/test20.d | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/runnable/test20.d b/test/runnable/test20.d index 799f5b3ff5d7..2f650a724687 100644 --- a/test/runnable/test20.d +++ b/test/runnable/test20.d @@ -841,11 +841,11 @@ void test45() { S45 s = S45(10); S45 val = pow!(S45)(s,2); - printf("x = %2.2lf, y = %2.2lf\n", val.x, val.y); + printf("x = %2.2f, y = %2.2f\n", val.x, val.y); assert(val.x == 100); assert(val.y == 0); double d = pow!(double)(10,3); - printf("%2.2lf\n", d); + printf("%2.2f\n", d); assert(d == 1000); } From d357d879bfe3511da3918edaee8503d58b3ff25b Mon Sep 17 00:00:00 2001 From: Luhrel Date: Wed, 4 Mar 2020 09:14:41 +0100 Subject: [PATCH 08/10] Merge documentation --- changelog/{chkprintf.md => chkformat.md} | 20 +++++----- changelog/chkscanf.md | 50 ------------------------ 2 files changed, 10 insertions(+), 60 deletions(-) rename changelog/{chkprintf.md => chkformat.md} (68%) delete mode 100644 changelog/chkscanf.md diff --git a/changelog/chkprintf.md b/changelog/chkformat.md similarity index 68% rename from changelog/chkprintf.md rename to changelog/chkformat.md index 9d9b61b4508f..e6ca23e72a3d 100644 --- a/changelog/chkprintf.md +++ b/changelog/chkformat.md @@ -1,12 +1,12 @@ -# Validate printf arguments against format specifiers +# Validate printf and scanf (variants too) arguments against format specifiers -Follows the C99 specification 7.19.6.1 for printf. +Follows the C99 specification 7.19.6.1 for printf and 7.19.6.2 for scanf. -sprintf and fprintf are also affected by this change. - -Takes a generous, rather than strict, view of compatiblity. +For printf, it takes a generous, rather than strict, view of compatiblity. For example, an unsigned value can be formatted with a signed specifier. +For scanf, it takes a strict view of compatiblity. + Diagnosed incompatibilities are: 1. incompatible sizes which will cause argument misalignment @@ -22,7 +22,7 @@ Per the C Standard, extra arguments are ignored. No attempt is made to fix the arguments or the format string. -In order to use non-Standard printf formats, an easy workaround is: +In order to use non-Standard printf/scanf formats, an easy workaround is: ``` printf("%k\n", value); // error: non-Standard format k @@ -38,14 +38,14 @@ Most of the errors detected are portability issues. For instance, string s; printf("%.*s\n", s.length, s.ptr); printf("%d\n", s.sizeof); -long i; -printf("%ld\n", i); +ulong u; +scanf("%lld%*c\n", u); ``` should be replaced with: ``` string s; printf("%.*s\n", cast(int) s.length, s.ptr); printf("%zd\n", s.sizeof); -long i; -printf("%lld\n", i); +ulong u; +scanf("%llu%*c\n", u); ``` diff --git a/changelog/chkscanf.md b/changelog/chkscanf.md deleted file mode 100644 index 18af58f6d597..000000000000 --- a/changelog/chkscanf.md +++ /dev/null @@ -1,50 +0,0 @@ -# Validate scanf arguments against format specifiers - -sscanf and fscanf are also affected by this change. - -Follows the C99 specification 7.19.6.2 for scanf. - -Takes a strict view of compatiblity. - -Diagnosed incompatibilities are: - -1. incompatible sizes which will cause argument misalignment -2. insufficient number of arguments -3. struct arguments -4. array and slice arguments -5. non-standard formats -6. undefined behavior per C99 - -Per the C Standard, extra arguments are ignored. - -No attempt is made to fix the arguments or the format string. - -In order to use non-Standard scanf formats, an easy workaround is: - -``` -scanf("%k\n", value); // error: non-Standard format k -``` -``` -const format = "%k\n"; -scanf(format.ptr, value); // no error -``` - -Most of the errors detected are portability issues. For instance, - -``` -int i; -scanf("%ld\n", &i); -size_t s; -scanf("%d\n", &s); -ulong u; -scanf("%lld%*c\n", u); -``` -should be replaced with: -``` -int i; -scanf("%d\n", &i; -size_t s; -scanf("%zd\n", &s); -ulong u; -scanf("%llu%*c\n", u); -``` From 59621f4ae272b996b8ecd34be7a888db8ed3ff08 Mon Sep 17 00:00:00 2001 From: Luhrel Date: Wed, 4 Mar 2020 21:32:01 +0100 Subject: [PATCH 09/10] Little cleanup --- changelog/chkformat.md | 4 ++-- src/build.d | 4 ++-- src/dmd/chkformat.d | 5 +++- src/dmd/expressionsem.d | 27 +-------------------- test/fail_compilation/chkformat.d | 39 ++++++++++++++++++------------- 5 files changed, 32 insertions(+), 47 deletions(-) diff --git a/changelog/chkformat.md b/changelog/chkformat.md index e6ca23e72a3d..7ec395eea350 100644 --- a/changelog/chkformat.md +++ b/changelog/chkformat.md @@ -39,7 +39,7 @@ string s; printf("%.*s\n", s.length, s.ptr); printf("%d\n", s.sizeof); ulong u; -scanf("%lld%*c\n", u); +scanf("%lld%*c\n", &u); ``` should be replaced with: ``` @@ -47,5 +47,5 @@ string s; printf("%.*s\n", cast(int) s.length, s.ptr); printf("%zd\n", s.sizeof); ulong u; -scanf("%llu%*c\n", u); +scanf("%llu%*c\n", &u); ``` diff --git a/src/build.d b/src/build.d index 92a0a30fc8ae..5b5b0b22d626 100755 --- a/src/build.d +++ b/src/build.d @@ -1158,7 +1158,7 @@ auto sourceFiles() "), frontend: fileArray(env["D"], " access.d aggregate.d aliasthis.d apply.d argtypes.d argtypes_sysv_x64.d arrayop.d - arraytypes.d ast_node.d astcodegen.d attrib.d blockexit.d builtin.d canthrow.d + arraytypes.d ast_node.d astcodegen.d attrib.d blockexit.d builtin.d canthrow.d chkformat.d cli.d clone.d compiler.d complex.d cond.d constfold.d cppmangle.d cppmanglewin.d ctfeexpr.d ctorflow.d dcast.d dclass.d declaration.d delegatize.d denum.d dimport.d dinifile.d dinterpret.d dmacro.d dmangle.d dmodule.d doc.d dscope.d dstruct.d dsymbol.d dsymbolsem.d @@ -1168,7 +1168,7 @@ auto sourceFiles() parse.d parsetimevisitor.d permissivevisitor.d printast.d safe.d sapply.d scanelf.d scanmach.d scanmscoff.d scanomf.d semantic2.d semantic3.d sideeffect.d statement.d statement_rewrite_walker.d statementsem.d staticassert.d staticcond.d target.d templateparamsem.d traits.d - transitivevisitor.d typesem.d typinf.d utils.d visitor.d vsoptions.d foreachvar.d chkprintf.d + transitivevisitor.d typesem.d typinf.d utils.d visitor.d vsoptions.d foreachvar.d "), backendHeaders: fileArray(env["C"], " cc.d cdef.d cgcv.d code.d cv4.d dt.d el.d global.d diff --git a/src/dmd/chkformat.d b/src/dmd/chkformat.d index 7b033287612c..59efaf01873a 100644 --- a/src/dmd/chkformat.d +++ b/src/dmd/chkformat.d @@ -893,6 +893,9 @@ Format parseGenericFormatSpecifier(scope const char[] format, unittest { + /* parseGenericFormatSpecifier + */ + char genSpecifier; size_t idx; @@ -914,7 +917,7 @@ unittest idx = 0; assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error); - /* + /* parsePrintfFormatSpecifier */ bool widthStar; diff --git a/src/dmd/expressionsem.d b/src/dmd/expressionsem.d index b92597b096c6..fa1431be41a5 100644 --- a/src/dmd/expressionsem.d +++ b/src/dmd/expressionsem.d @@ -2150,8 +2150,7 @@ private bool functionParameters(const ref Loc loc, Scope* sc, (*arguments)[i] = arg; } - /* If calling C scanf(), sscanf(), fscanf(), printf(), sprintf() or fprintf(), - check the format string against the arguments + /* If calling C scanf(), printf(), or any variants, check the format string against the arguments */ if (tf.linkage == LINK.c && fd) { @@ -2185,30 +2184,6 @@ private bool functionParameters(const ref Loc loc, Scope* sc, if (se && chkFn(se.loc, se.peekString(), (*arguments)[paramOffset .. nargs])) err = true; } - /*if (fd.ident == Id.printf && nparams >= 1) - { - auto se = (*arguments)[0].isStringExp(); - if (se && checkPrintfFormat(se.loc, se.peekString(), (*arguments)[1 .. nargs])) - err = true; - } - else if (fd.ident == Id.scanf && nparams >= 1) - { - auto se = (*arguments)[0].isStringExp(); - if (se && checkScanfFormat(se.loc, se.peekString(), (*arguments)[1 .. nargs])) - err = true; - } - else if ((fd.ident == Id.sprintf || fd.ident == Id.fprintf) && nparams >= 2) - { - auto se = (*arguments)[1].isStringExp(); - if (se && checkPrintfFormat(se.loc, se.peekString(), (*arguments)[2 .. nargs])) - err = true; - } - else if ((fd.ident == Id.sscanf || fd.ident == Id.fscanf) && nparams >= 2) - { - auto se = (*arguments)[1].isStringExp(); - if (se && checkScanfFormat(se.loc, se.peekString(), (*arguments)[2 .. nargs])) - err = true; - }*/ } /* Remaining problems: diff --git a/test/fail_compilation/chkformat.d b/test/fail_compilation/chkformat.d index 78a57e9143c5..b0784a48a916 100644 --- a/test/fail_compilation/chkformat.d +++ b/test/fail_compilation/chkformat.d @@ -5,23 +5,24 @@ TEST_OUTPUT: fail_compilation/chkformat.d(101): Deprecation: width argument `0L` for format specification `"%*.*d"` must be `int`, not `long` fail_compilation/chkformat.d(101): Deprecation: precision argument `1L` for format specification `"%*.*d"` must be `int`, not `long` fail_compilation/chkformat.d(101): Deprecation: argument `2L` for format specification `"%*.*d"` must be `int`, not `long` -fail_compilation/chkformat.d(103): Deprecation: argument `4` for format specification `"%lld"` must be `long`, not `int` -fail_compilation/chkformat.d(104): Deprecation: argument `5` for format specification `"%jd"` must be `core.stdc.stdint.intmax_t`, not `int` -fail_compilation/chkformat.d(105): Deprecation: argument `6.00000` for format specification `"%zd"` must be `size_t`, not `double` -fail_compilation/chkformat.d(106): Deprecation: argument `7.00000` for format specification `"%td"` must be `ptrdiff_t`, not `double` -fail_compilation/chkformat.d(107): Deprecation: argument `8.00000L` for format specification `"%g"` must be `double`, not `real` -fail_compilation/chkformat.d(108): Deprecation: argument `9.00000` for format specification `"%Lg"` must be `real`, not `double` -fail_compilation/chkformat.d(109): Deprecation: argument `10` for format specification `"%p"` must be `void*`, not `int` -fail_compilation/chkformat.d(110): Deprecation: argument `& u` for format specification `"%n"` must be `int*`, not `uint*` -fail_compilation/chkformat.d(112): Deprecation: argument `& u` for format specification `"%lln"` must be `long*`, not `int*` -fail_compilation/chkformat.d(113): Deprecation: argument `& u` for format specification `"%hn"` must be `short*`, not `int*` -fail_compilation/chkformat.d(114): Deprecation: argument `& u` for format specification `"%hhn"` must be `byte*`, not `int*` -fail_compilation/chkformat.d(115): Deprecation: argument `16L` for format specification `"%c"` must be `char`, not `long` -fail_compilation/chkformat.d(116): Deprecation: argument `17L` for format specification `"%c"` must be `char`, not `long` -fail_compilation/chkformat.d(117): Deprecation: argument `& u` for format specification `"%s"` must be `char*`, not `int*` -fail_compilation/chkformat.d(118): Deprecation: argument `& u` for format specification `"%ls"` must be `wchar_t*`, not `int*` -fail_compilation/chkformat.d(119): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` +fail_compilation/chkformat.d(102): Deprecation: format specifier `"%2.2lf"` is invalid +fail_compilation/chkformat.d(104): Deprecation: argument `4` for format specification `"%lld"` must be `long`, not `int` +fail_compilation/chkformat.d(105): Deprecation: argument `5` for format specification `"%jd"` must be `core.stdc.stdint.intmax_t`, not `int` +fail_compilation/chkformat.d(106): Deprecation: argument `6.00000` for format specification `"%zd"` must be `size_t`, not `double` +fail_compilation/chkformat.d(107): Deprecation: argument `7.00000` for format specification `"%td"` must be `ptrdiff_t`, not `double` +fail_compilation/chkformat.d(108): Deprecation: argument `8.00000L` for format specification `"%g"` must be `double`, not `real` +fail_compilation/chkformat.d(109): Deprecation: argument `9.00000` for format specification `"%Lg"` must be `real`, not `double` +fail_compilation/chkformat.d(110): Deprecation: argument `10` for format specification `"%p"` must be `void*`, not `int` +fail_compilation/chkformat.d(111): Deprecation: argument `& u` for format specification `"%n"` must be `int*`, not `uint*` +fail_compilation/chkformat.d(113): Deprecation: argument `& u` for format specification `"%lln"` must be `long*`, not `int*` +fail_compilation/chkformat.d(114): Deprecation: argument `& u` for format specification `"%hn"` must be `short*`, not `int*` +fail_compilation/chkformat.d(115): Deprecation: argument `& u` for format specification `"%hhn"` must be `byte*`, not `int*` +fail_compilation/chkformat.d(116): Deprecation: argument `16L` for format specification `"%c"` must be `char`, not `long` +fail_compilation/chkformat.d(117): Deprecation: argument `17L` for format specification `"%c"` must be `char`, not `long` +fail_compilation/chkformat.d(118): Deprecation: argument `& u` for format specification `"%s"` must be `char*`, not `int*` +fail_compilation/chkformat.d(119): Deprecation: argument `& u` for format specification `"%ls"` must be `wchar_t*`, not `int*` fail_compilation/chkformat.d(120): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` +fail_compilation/chkformat.d(121): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` fail_compilation/chkformat.d(201): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` fail_compilation/chkformat.d(202): Deprecation: more format specifiers than 1 arguments fail_compilation/chkformat.d(203): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` @@ -63,6 +64,7 @@ import core.stdc.stdio; #line 100 void test1() { printf("%*.*d\n", 0L, 1L, 2L); } +void test2() { printf("%2.2lf\n", 3.0); } //void test3() { printf("%ld\n", 3.0); } void test4() { printf("%lld\n", 4); } void test5() { printf("%jd\n", 5); } @@ -119,3 +121,8 @@ void test61() { int u; scanf("%]\n", &u); } void test62() { int u; scanf("%90s\n", &u); } void test63() { sscanf("1234", "%d\n", 0L); } void test64() { fscanf(null, "%d\n", 0L); } + +// TODO - C++ 11 only: +//void test() { vscanf(); } +//void test() { vfscanf(); } +//void test() { vsscanf(); } From 4b1ebbe25004523b2eefabbe528603caf262b4bd Mon Sep 17 00:00:00 2001 From: Luhrel Date: Thu, 5 Mar 2020 19:00:44 +0100 Subject: [PATCH 10/10] Remove validation for sprintf and fprintf --- src/dmd/expressionsem.d | 5 ----- test/fail_compilation/chkformat.d | 6 ++---- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/dmd/expressionsem.d b/src/dmd/expressionsem.d index fa1431be41a5..d448a91f01f9 100644 --- a/src/dmd/expressionsem.d +++ b/src/dmd/expressionsem.d @@ -2167,11 +2167,6 @@ private bool functionParameters(const ref Loc loc, Scope* sc, paramOffset = 1; chkFn = &checkScanfFormat; } - else if (fd.ident == Id.sprintf || fd.ident == Id.fprintf) - { - paramOffset = 2; - chkFn = &checkPrintfFormat; - } else if (fd.ident == Id.sscanf || fd.ident == Id.fscanf) { paramOffset = 2; diff --git a/test/fail_compilation/chkformat.d b/test/fail_compilation/chkformat.d index b0784a48a916..ed7f65ae0589 100644 --- a/test/fail_compilation/chkformat.d +++ b/test/fail_compilation/chkformat.d @@ -21,8 +21,6 @@ fail_compilation/chkformat.d(116): Deprecation: argument `16L` for format specif fail_compilation/chkformat.d(117): Deprecation: argument `17L` for format specification `"%c"` must be `char`, not `long` fail_compilation/chkformat.d(118): Deprecation: argument `& u` for format specification `"%s"` must be `char*`, not `int*` fail_compilation/chkformat.d(119): Deprecation: argument `& u` for format specification `"%ls"` must be `wchar_t*`, not `int*` -fail_compilation/chkformat.d(120): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` -fail_compilation/chkformat.d(121): Deprecation: argument `& u` for format specification `"%d"` must be `int`, not `int*` fail_compilation/chkformat.d(201): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` fail_compilation/chkformat.d(202): Deprecation: more format specifiers than 1 arguments fail_compilation/chkformat.d(203): Deprecation: argument `0L` for format specification `"%d"` must be `int*`, not `long` @@ -82,8 +80,8 @@ void test16() { printf("%c\n", 16L); } void test17() { printf("%c\n", 17L); } void test18() { int u; printf("%s\n", &u); } void test19() { int u; printf("%ls\n", &u); } -void test20() { int u; char[] s; sprintf(&s[0], "%d\n", &u); } -void test21() { int u; fprintf(null, "%d\n", &u); } +//void test20() { int u; char[] s; sprintf(&s[0], "%d\n", &u); } +//void test21() { int u; fprintf(null, "%d\n", &u); } #line 200