123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568 |
- //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // Scanf/printf implementation for use in *Sanitizer interceptors.
- // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
- // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
- // with a few common GNU extensions.
- //
- //===----------------------------------------------------------------------===//
- #include <stdarg.h>
- static const char *parse_number(const char *p, int *out) {
- *out = internal_atoll(p);
- while (*p >= '0' && *p <= '9')
- ++p;
- return p;
- }
- static const char *maybe_parse_param_index(const char *p, int *out) {
- // n$
- if (*p >= '0' && *p <= '9') {
- int number;
- const char *q = parse_number(p, &number);
- CHECK(q);
- if (*q == '$') {
- *out = number;
- p = q + 1;
- }
- }
- // Otherwise, do not change p. This will be re-parsed later as the field
- // width.
- return p;
- }
- static bool char_is_one_of(char c, const char *s) {
- return !!internal_strchr(s, c);
- }
- static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
- if (char_is_one_of(*p, "jztLq")) {
- ll[0] = *p;
- ++p;
- } else if (*p == 'h') {
- ll[0] = 'h';
- ++p;
- if (*p == 'h') {
- ll[1] = 'h';
- ++p;
- }
- } else if (*p == 'l') {
- ll[0] = 'l';
- ++p;
- if (*p == 'l') {
- ll[1] = 'l';
- ++p;
- }
- }
- return p;
- }
- // Returns true if the character is an integer conversion specifier.
- static bool format_is_integer_conv(char c) {
- return char_is_one_of(c, "diouxXn");
- }
- // Returns true if the character is an floating point conversion specifier.
- static bool format_is_float_conv(char c) {
- return char_is_one_of(c, "aAeEfFgG");
- }
- // Returns string output character size for string-like conversions,
- // or 0 if the conversion is invalid.
- static int format_get_char_size(char convSpecifier,
- const char lengthModifier[2]) {
- if (char_is_one_of(convSpecifier, "CS")) {
- return sizeof(wchar_t);
- }
- if (char_is_one_of(convSpecifier, "cs[")) {
- if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
- return sizeof(wchar_t);
- else if (lengthModifier[0] == '\0')
- return sizeof(char);
- }
- return 0;
- }
- enum FormatStoreSize {
- // Store size not known in advance; can be calculated as wcslen() of the
- // destination buffer.
- FSS_WCSLEN = -2,
- // Store size not known in advance; can be calculated as strlen() of the
- // destination buffer.
- FSS_STRLEN = -1,
- // Invalid conversion specifier.
- FSS_INVALID = 0
- };
- // Returns the memory size of a format directive (if >0), or a value of
- // FormatStoreSize.
- static int format_get_value_size(char convSpecifier,
- const char lengthModifier[2],
- bool promote_float) {
- if (format_is_integer_conv(convSpecifier)) {
- switch (lengthModifier[0]) {
- case 'h':
- return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
- case 'l':
- return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
- case 'q':
- return sizeof(long long);
- case 'L':
- return sizeof(long long);
- case 'j':
- return sizeof(INTMAX_T);
- case 'z':
- return sizeof(SIZE_T);
- case 't':
- return sizeof(PTRDIFF_T);
- case 0:
- return sizeof(int);
- default:
- return FSS_INVALID;
- }
- }
- if (format_is_float_conv(convSpecifier)) {
- switch (lengthModifier[0]) {
- case 'L':
- case 'q':
- return sizeof(long double);
- case 'l':
- return lengthModifier[1] == 'l' ? sizeof(long double)
- : sizeof(double);
- case 0:
- // Printf promotes floats to doubles but scanf does not
- return promote_float ? sizeof(double) : sizeof(float);
- default:
- return FSS_INVALID;
- }
- }
- if (convSpecifier == 'p') {
- if (lengthModifier[0] != 0)
- return FSS_INVALID;
- return sizeof(void *);
- }
- return FSS_INVALID;
- }
- struct ScanfDirective {
- int argIdx; // argument index, or -1 if not specified ("%n$")
- int fieldWidth;
- const char *begin;
- const char *end;
- bool suppressed; // suppress assignment ("*")
- bool allocate; // allocate space ("m")
- char lengthModifier[2];
- char convSpecifier;
- bool maybeGnuMalloc;
- };
- // Parse scanf format string. If a valid directive in encountered, it is
- // returned in dir. This function returns the pointer to the first
- // unprocessed character, or 0 in case of error.
- // In case of the end-of-string, a pointer to the closing \0 is returned.
- static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
- ScanfDirective *dir) {
- internal_memset(dir, 0, sizeof(*dir));
- dir->argIdx = -1;
- while (*p) {
- if (*p != '%') {
- ++p;
- continue;
- }
- dir->begin = p;
- ++p;
- // %%
- if (*p == '%') {
- ++p;
- continue;
- }
- if (*p == '\0') {
- return nullptr;
- }
- // %n$
- p = maybe_parse_param_index(p, &dir->argIdx);
- CHECK(p);
- // *
- if (*p == '*') {
- dir->suppressed = true;
- ++p;
- }
- // Field width
- if (*p >= '0' && *p <= '9') {
- p = parse_number(p, &dir->fieldWidth);
- CHECK(p);
- if (dir->fieldWidth <= 0) // Width if at all must be non-zero
- return nullptr;
- }
- // m
- if (*p == 'm') {
- dir->allocate = true;
- ++p;
- }
- // Length modifier.
- p = maybe_parse_length_modifier(p, dir->lengthModifier);
- // Conversion specifier.
- dir->convSpecifier = *p++;
- // Consume %[...] expression.
- if (dir->convSpecifier == '[') {
- if (*p == '^')
- ++p;
- if (*p == ']')
- ++p;
- while (*p && *p != ']')
- ++p;
- if (*p == 0)
- return nullptr; // unexpected end of string
- // Consume the closing ']'.
- ++p;
- }
- // This is unfortunately ambiguous between old GNU extension
- // of %as, %aS and %a[...] and newer POSIX %a followed by
- // letters s, S or [.
- if (allowGnuMalloc && dir->convSpecifier == 'a' &&
- !dir->lengthModifier[0]) {
- if (*p == 's' || *p == 'S') {
- dir->maybeGnuMalloc = true;
- ++p;
- } else if (*p == '[') {
- // Watch for %a[h-j%d], if % appears in the
- // [...] range, then we need to give up, we don't know
- // if scanf will parse it as POSIX %a [h-j %d ] or
- // GNU allocation of string with range dh-j plus %.
- const char *q = p + 1;
- if (*q == '^')
- ++q;
- if (*q == ']')
- ++q;
- while (*q && *q != ']' && *q != '%')
- ++q;
- if (*q == 0 || *q == '%')
- return nullptr;
- p = q + 1; // Consume the closing ']'.
- dir->maybeGnuMalloc = true;
- }
- }
- dir->end = p;
- break;
- }
- return p;
- }
- static int scanf_get_value_size(ScanfDirective *dir) {
- if (dir->allocate) {
- if (!char_is_one_of(dir->convSpecifier, "cCsS["))
- return FSS_INVALID;
- return sizeof(char *);
- }
- if (dir->maybeGnuMalloc) {
- if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
- return FSS_INVALID;
- // This is ambiguous, so check the smaller size of char * (if it is
- // a GNU extension of %as, %aS or %a[...]) and float (if it is
- // POSIX %a followed by s, S or [ letters).
- return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
- }
- if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
- bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
- unsigned charSize =
- format_get_char_size(dir->convSpecifier, dir->lengthModifier);
- if (charSize == 0)
- return FSS_INVALID;
- if (dir->fieldWidth == 0) {
- if (!needsTerminator)
- return charSize;
- return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
- }
- return (dir->fieldWidth + needsTerminator) * charSize;
- }
- return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
- }
- // Common part of *scanf interceptors.
- // Process format string and va_list, and report all store ranges.
- // Stops when "consuming" n_inputs input items.
- static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
- const char *format, va_list aq) {
- CHECK_GT(n_inputs, 0);
- const char *p = format;
- COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
- while (*p) {
- ScanfDirective dir;
- p = scanf_parse_next(p, allowGnuMalloc, &dir);
- if (!p)
- break;
- if (dir.convSpecifier == 0) {
- // This can only happen at the end of the format string.
- CHECK_EQ(*p, 0);
- break;
- }
- // Here the directive is valid. Do what it says.
- if (dir.argIdx != -1) {
- // Unsupported.
- break;
- }
- if (dir.suppressed)
- continue;
- int size = scanf_get_value_size(&dir);
- if (size == FSS_INVALID) {
- Report("%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n",
- SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
- break;
- }
- void *argp = va_arg(aq, void *);
- if (dir.convSpecifier != 'n')
- --n_inputs;
- if (n_inputs < 0)
- break;
- if (size == FSS_STRLEN) {
- size = internal_strlen((const char *)argp) + 1;
- } else if (size == FSS_WCSLEN) {
- // FIXME: actually use wcslen() to calculate it.
- size = 0;
- }
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
- // For %ms/%mc, write the allocated output buffer as well.
- if (dir.allocate) {
- char *buf = *(char **)argp;
- if (buf)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
- }
- }
- }
- #if SANITIZER_INTERCEPT_PRINTF
- struct PrintfDirective {
- int fieldWidth;
- int fieldPrecision;
- int argIdx; // width argument index, or -1 if not specified ("%*n$")
- int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
- const char *begin;
- const char *end;
- bool starredWidth;
- bool starredPrecision;
- char lengthModifier[2];
- char convSpecifier;
- };
- static const char *maybe_parse_number(const char *p, int *out) {
- if (*p >= '0' && *p <= '9')
- p = parse_number(p, out);
- return p;
- }
- static const char *maybe_parse_number_or_star(const char *p, int *out,
- bool *star) {
- if (*p == '*') {
- *star = true;
- ++p;
- } else {
- *star = false;
- p = maybe_parse_number(p, out);
- }
- return p;
- }
- // Parse printf format string. Same as scanf_parse_next.
- static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
- internal_memset(dir, 0, sizeof(*dir));
- dir->argIdx = -1;
- dir->precisionIdx = -1;
- while (*p) {
- if (*p != '%') {
- ++p;
- continue;
- }
- dir->begin = p;
- ++p;
- // %%
- if (*p == '%') {
- ++p;
- continue;
- }
- if (*p == '\0') {
- return nullptr;
- }
- // %n$
- p = maybe_parse_param_index(p, &dir->precisionIdx);
- CHECK(p);
- // Flags
- while (char_is_one_of(*p, "'-+ #0")) {
- ++p;
- }
- // Field width
- p = maybe_parse_number_or_star(p, &dir->fieldWidth,
- &dir->starredWidth);
- if (!p)
- return nullptr;
- // Precision
- if (*p == '.') {
- ++p;
- // Actual precision is optional (surprise!)
- p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
- &dir->starredPrecision);
- if (!p)
- return nullptr;
- // m$
- if (dir->starredPrecision) {
- p = maybe_parse_param_index(p, &dir->precisionIdx);
- CHECK(p);
- }
- }
- // Length modifier.
- p = maybe_parse_length_modifier(p, dir->lengthModifier);
- // Conversion specifier.
- dir->convSpecifier = *p++;
- dir->end = p;
- break;
- }
- return p;
- }
- static int printf_get_value_size(PrintfDirective *dir) {
- if (char_is_one_of(dir->convSpecifier, "cCsS")) {
- unsigned charSize =
- format_get_char_size(dir->convSpecifier, dir->lengthModifier);
- if (charSize == 0)
- return FSS_INVALID;
- if (char_is_one_of(dir->convSpecifier, "sS")) {
- return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
- }
- return charSize;
- }
- return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
- }
- #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \
- do { \
- if (format_is_float_conv(convSpecifier)) { \
- switch (size) { \
- case 8: \
- va_arg(*aq, double); \
- break; \
- case 12: \
- va_arg(*aq, long double); \
- break; \
- case 16: \
- va_arg(*aq, long double); \
- break; \
- default: \
- Report("WARNING: unexpected floating-point arg size" \
- " in printf interceptor: %zu\n", static_cast<uptr>(size)); \
- return; \
- } \
- } else { \
- switch (size) { \
- case 1: \
- case 2: \
- case 4: \
- va_arg(*aq, u32); \
- break; \
- case 8: \
- va_arg(*aq, u64); \
- break; \
- default: \
- Report("WARNING: unexpected arg size" \
- " in printf interceptor: %zu\n", static_cast<uptr>(size)); \
- return; \
- } \
- } \
- } while (0)
- // Common part of *printf interceptors.
- // Process format string and va_list, and report all load ranges.
- static void printf_common(void *ctx, const char *format, va_list aq) {
- COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
- const char *p = format;
- while (*p) {
- PrintfDirective dir;
- p = printf_parse_next(p, &dir);
- if (!p)
- break;
- if (dir.convSpecifier == 0) {
- // This can only happen at the end of the format string.
- CHECK_EQ(*p, 0);
- break;
- }
- // Here the directive is valid. Do what it says.
- if (dir.argIdx != -1 || dir.precisionIdx != -1) {
- // Unsupported.
- break;
- }
- if (dir.starredWidth) {
- // Dynamic width
- SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
- }
- if (dir.starredPrecision) {
- // Dynamic precision
- SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
- }
- // %m does not require an argument: strlen(errno).
- if (dir.convSpecifier == 'm')
- continue;
- int size = printf_get_value_size(&dir);
- if (size == FSS_INVALID) {
- static int ReportedOnce;
- if (!ReportedOnce++)
- Report(
- "%s: WARNING: unexpected format specifier in printf "
- "interceptor: %.*s (reported once per process)\n",
- SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
- break;
- }
- if (dir.convSpecifier == 'n') {
- void *argp = va_arg(aq, void *);
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
- continue;
- } else if (size == FSS_STRLEN) {
- if (void *argp = va_arg(aq, void *)) {
- if (dir.starredPrecision) {
- // FIXME: properly support starred precision for strings.
- size = 0;
- } else if (dir.fieldPrecision > 0) {
- // Won't read more than "precision" symbols.
- size = internal_strnlen((const char *)argp, dir.fieldPrecision);
- if (size < dir.fieldPrecision) size++;
- } else {
- // Whole string will be accessed.
- size = internal_strlen((const char *)argp) + 1;
- }
- COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
- }
- } else if (size == FSS_WCSLEN) {
- if (void *argp = va_arg(aq, void *)) {
- // FIXME: Properly support wide-character strings (via wcsrtombs).
- size = 0;
- COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
- }
- } else {
- // Skip non-pointer args
- SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
- }
- }
- }
- #endif // SANITIZER_INTERCEPT_PRINTF
|