sanitizer_common_interceptors_format.inc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Scanf/printf implementation for use in *Sanitizer interceptors.
  10. // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
  11. // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
  12. // with a few common GNU extensions.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include <stdarg.h>
  16. static const char *parse_number(const char *p, int *out) {
  17. *out = internal_atoll(p);
  18. while (*p >= '0' && *p <= '9')
  19. ++p;
  20. return p;
  21. }
  22. static const char *maybe_parse_param_index(const char *p, int *out) {
  23. // n$
  24. if (*p >= '0' && *p <= '9') {
  25. int number;
  26. const char *q = parse_number(p, &number);
  27. CHECK(q);
  28. if (*q == '$') {
  29. *out = number;
  30. p = q + 1;
  31. }
  32. }
  33. // Otherwise, do not change p. This will be re-parsed later as the field
  34. // width.
  35. return p;
  36. }
  37. static bool char_is_one_of(char c, const char *s) {
  38. return !!internal_strchr(s, c);
  39. }
  40. static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
  41. if (char_is_one_of(*p, "jztLq")) {
  42. ll[0] = *p;
  43. ++p;
  44. } else if (*p == 'h') {
  45. ll[0] = 'h';
  46. ++p;
  47. if (*p == 'h') {
  48. ll[1] = 'h';
  49. ++p;
  50. }
  51. } else if (*p == 'l') {
  52. ll[0] = 'l';
  53. ++p;
  54. if (*p == 'l') {
  55. ll[1] = 'l';
  56. ++p;
  57. }
  58. }
  59. return p;
  60. }
  61. // Returns true if the character is an integer conversion specifier.
  62. static bool format_is_integer_conv(char c) {
  63. return char_is_one_of(c, "diouxXn");
  64. }
  65. // Returns true if the character is an floating point conversion specifier.
  66. static bool format_is_float_conv(char c) {
  67. return char_is_one_of(c, "aAeEfFgG");
  68. }
  69. // Returns string output character size for string-like conversions,
  70. // or 0 if the conversion is invalid.
  71. static int format_get_char_size(char convSpecifier,
  72. const char lengthModifier[2]) {
  73. if (char_is_one_of(convSpecifier, "CS")) {
  74. return sizeof(wchar_t);
  75. }
  76. if (char_is_one_of(convSpecifier, "cs[")) {
  77. if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
  78. return sizeof(wchar_t);
  79. else if (lengthModifier[0] == '\0')
  80. return sizeof(char);
  81. }
  82. return 0;
  83. }
  84. enum FormatStoreSize {
  85. // Store size not known in advance; can be calculated as wcslen() of the
  86. // destination buffer.
  87. FSS_WCSLEN = -2,
  88. // Store size not known in advance; can be calculated as strlen() of the
  89. // destination buffer.
  90. FSS_STRLEN = -1,
  91. // Invalid conversion specifier.
  92. FSS_INVALID = 0
  93. };
  94. // Returns the memory size of a format directive (if >0), or a value of
  95. // FormatStoreSize.
  96. static int format_get_value_size(char convSpecifier,
  97. const char lengthModifier[2],
  98. bool promote_float) {
  99. if (format_is_integer_conv(convSpecifier)) {
  100. switch (lengthModifier[0]) {
  101. case 'h':
  102. return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
  103. case 'l':
  104. return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
  105. case 'q':
  106. return sizeof(long long);
  107. case 'L':
  108. return sizeof(long long);
  109. case 'j':
  110. return sizeof(INTMAX_T);
  111. case 'z':
  112. return sizeof(SIZE_T);
  113. case 't':
  114. return sizeof(PTRDIFF_T);
  115. case 0:
  116. return sizeof(int);
  117. default:
  118. return FSS_INVALID;
  119. }
  120. }
  121. if (format_is_float_conv(convSpecifier)) {
  122. switch (lengthModifier[0]) {
  123. case 'L':
  124. case 'q':
  125. return sizeof(long double);
  126. case 'l':
  127. return lengthModifier[1] == 'l' ? sizeof(long double)
  128. : sizeof(double);
  129. case 0:
  130. // Printf promotes floats to doubles but scanf does not
  131. return promote_float ? sizeof(double) : sizeof(float);
  132. default:
  133. return FSS_INVALID;
  134. }
  135. }
  136. if (convSpecifier == 'p') {
  137. if (lengthModifier[0] != 0)
  138. return FSS_INVALID;
  139. return sizeof(void *);
  140. }
  141. return FSS_INVALID;
  142. }
  143. struct ScanfDirective {
  144. int argIdx; // argument index, or -1 if not specified ("%n$")
  145. int fieldWidth;
  146. const char *begin;
  147. const char *end;
  148. bool suppressed; // suppress assignment ("*")
  149. bool allocate; // allocate space ("m")
  150. char lengthModifier[2];
  151. char convSpecifier;
  152. bool maybeGnuMalloc;
  153. };
  154. // Parse scanf format string. If a valid directive in encountered, it is
  155. // returned in dir. This function returns the pointer to the first
  156. // unprocessed character, or 0 in case of error.
  157. // In case of the end-of-string, a pointer to the closing \0 is returned.
  158. static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
  159. ScanfDirective *dir) {
  160. internal_memset(dir, 0, sizeof(*dir));
  161. dir->argIdx = -1;
  162. while (*p) {
  163. if (*p != '%') {
  164. ++p;
  165. continue;
  166. }
  167. dir->begin = p;
  168. ++p;
  169. // %%
  170. if (*p == '%') {
  171. ++p;
  172. continue;
  173. }
  174. if (*p == '\0') {
  175. return nullptr;
  176. }
  177. // %n$
  178. p = maybe_parse_param_index(p, &dir->argIdx);
  179. CHECK(p);
  180. // *
  181. if (*p == '*') {
  182. dir->suppressed = true;
  183. ++p;
  184. }
  185. // Field width
  186. if (*p >= '0' && *p <= '9') {
  187. p = parse_number(p, &dir->fieldWidth);
  188. CHECK(p);
  189. if (dir->fieldWidth <= 0) // Width if at all must be non-zero
  190. return nullptr;
  191. }
  192. // m
  193. if (*p == 'm') {
  194. dir->allocate = true;
  195. ++p;
  196. }
  197. // Length modifier.
  198. p = maybe_parse_length_modifier(p, dir->lengthModifier);
  199. // Conversion specifier.
  200. dir->convSpecifier = *p++;
  201. // Consume %[...] expression.
  202. if (dir->convSpecifier == '[') {
  203. if (*p == '^')
  204. ++p;
  205. if (*p == ']')
  206. ++p;
  207. while (*p && *p != ']')
  208. ++p;
  209. if (*p == 0)
  210. return nullptr; // unexpected end of string
  211. // Consume the closing ']'.
  212. ++p;
  213. }
  214. // This is unfortunately ambiguous between old GNU extension
  215. // of %as, %aS and %a[...] and newer POSIX %a followed by
  216. // letters s, S or [.
  217. if (allowGnuMalloc && dir->convSpecifier == 'a' &&
  218. !dir->lengthModifier[0]) {
  219. if (*p == 's' || *p == 'S') {
  220. dir->maybeGnuMalloc = true;
  221. ++p;
  222. } else if (*p == '[') {
  223. // Watch for %a[h-j%d], if % appears in the
  224. // [...] range, then we need to give up, we don't know
  225. // if scanf will parse it as POSIX %a [h-j %d ] or
  226. // GNU allocation of string with range dh-j plus %.
  227. const char *q = p + 1;
  228. if (*q == '^')
  229. ++q;
  230. if (*q == ']')
  231. ++q;
  232. while (*q && *q != ']' && *q != '%')
  233. ++q;
  234. if (*q == 0 || *q == '%')
  235. return nullptr;
  236. p = q + 1; // Consume the closing ']'.
  237. dir->maybeGnuMalloc = true;
  238. }
  239. }
  240. dir->end = p;
  241. break;
  242. }
  243. return p;
  244. }
  245. static int scanf_get_value_size(ScanfDirective *dir) {
  246. if (dir->allocate) {
  247. if (!char_is_one_of(dir->convSpecifier, "cCsS["))
  248. return FSS_INVALID;
  249. return sizeof(char *);
  250. }
  251. if (dir->maybeGnuMalloc) {
  252. if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
  253. return FSS_INVALID;
  254. // This is ambiguous, so check the smaller size of char * (if it is
  255. // a GNU extension of %as, %aS or %a[...]) and float (if it is
  256. // POSIX %a followed by s, S or [ letters).
  257. return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
  258. }
  259. if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
  260. bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
  261. unsigned charSize =
  262. format_get_char_size(dir->convSpecifier, dir->lengthModifier);
  263. if (charSize == 0)
  264. return FSS_INVALID;
  265. if (dir->fieldWidth == 0) {
  266. if (!needsTerminator)
  267. return charSize;
  268. return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
  269. }
  270. return (dir->fieldWidth + needsTerminator) * charSize;
  271. }
  272. return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
  273. }
  274. // Common part of *scanf interceptors.
  275. // Process format string and va_list, and report all store ranges.
  276. // Stops when "consuming" n_inputs input items.
  277. static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
  278. const char *format, va_list aq) {
  279. CHECK_GT(n_inputs, 0);
  280. const char *p = format;
  281. COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
  282. while (*p) {
  283. ScanfDirective dir;
  284. p = scanf_parse_next(p, allowGnuMalloc, &dir);
  285. if (!p)
  286. break;
  287. if (dir.convSpecifier == 0) {
  288. // This can only happen at the end of the format string.
  289. CHECK_EQ(*p, 0);
  290. break;
  291. }
  292. // Here the directive is valid. Do what it says.
  293. if (dir.argIdx != -1) {
  294. // Unsupported.
  295. break;
  296. }
  297. if (dir.suppressed)
  298. continue;
  299. int size = scanf_get_value_size(&dir);
  300. if (size == FSS_INVALID) {
  301. Report("%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n",
  302. SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
  303. break;
  304. }
  305. void *argp = va_arg(aq, void *);
  306. if (dir.convSpecifier != 'n')
  307. --n_inputs;
  308. if (n_inputs < 0)
  309. break;
  310. if (size == FSS_STRLEN) {
  311. size = internal_strlen((const char *)argp) + 1;
  312. } else if (size == FSS_WCSLEN) {
  313. // FIXME: actually use wcslen() to calculate it.
  314. size = 0;
  315. }
  316. COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
  317. // For %ms/%mc, write the allocated output buffer as well.
  318. if (dir.allocate) {
  319. char *buf = *(char **)argp;
  320. if (buf)
  321. COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
  322. }
  323. }
  324. }
  325. #if SANITIZER_INTERCEPT_PRINTF
  326. struct PrintfDirective {
  327. int fieldWidth;
  328. int fieldPrecision;
  329. int argIdx; // width argument index, or -1 if not specified ("%*n$")
  330. int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
  331. const char *begin;
  332. const char *end;
  333. bool starredWidth;
  334. bool starredPrecision;
  335. char lengthModifier[2];
  336. char convSpecifier;
  337. };
  338. static const char *maybe_parse_number(const char *p, int *out) {
  339. if (*p >= '0' && *p <= '9')
  340. p = parse_number(p, out);
  341. return p;
  342. }
  343. static const char *maybe_parse_number_or_star(const char *p, int *out,
  344. bool *star) {
  345. if (*p == '*') {
  346. *star = true;
  347. ++p;
  348. } else {
  349. *star = false;
  350. p = maybe_parse_number(p, out);
  351. }
  352. return p;
  353. }
  354. // Parse printf format string. Same as scanf_parse_next.
  355. static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
  356. internal_memset(dir, 0, sizeof(*dir));
  357. dir->argIdx = -1;
  358. dir->precisionIdx = -1;
  359. while (*p) {
  360. if (*p != '%') {
  361. ++p;
  362. continue;
  363. }
  364. dir->begin = p;
  365. ++p;
  366. // %%
  367. if (*p == '%') {
  368. ++p;
  369. continue;
  370. }
  371. if (*p == '\0') {
  372. return nullptr;
  373. }
  374. // %n$
  375. p = maybe_parse_param_index(p, &dir->precisionIdx);
  376. CHECK(p);
  377. // Flags
  378. while (char_is_one_of(*p, "'-+ #0")) {
  379. ++p;
  380. }
  381. // Field width
  382. p = maybe_parse_number_or_star(p, &dir->fieldWidth,
  383. &dir->starredWidth);
  384. if (!p)
  385. return nullptr;
  386. // Precision
  387. if (*p == '.') {
  388. ++p;
  389. // Actual precision is optional (surprise!)
  390. p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
  391. &dir->starredPrecision);
  392. if (!p)
  393. return nullptr;
  394. // m$
  395. if (dir->starredPrecision) {
  396. p = maybe_parse_param_index(p, &dir->precisionIdx);
  397. CHECK(p);
  398. }
  399. }
  400. // Length modifier.
  401. p = maybe_parse_length_modifier(p, dir->lengthModifier);
  402. // Conversion specifier.
  403. dir->convSpecifier = *p++;
  404. dir->end = p;
  405. break;
  406. }
  407. return p;
  408. }
  409. static int printf_get_value_size(PrintfDirective *dir) {
  410. if (char_is_one_of(dir->convSpecifier, "cCsS")) {
  411. unsigned charSize =
  412. format_get_char_size(dir->convSpecifier, dir->lengthModifier);
  413. if (charSize == 0)
  414. return FSS_INVALID;
  415. if (char_is_one_of(dir->convSpecifier, "sS")) {
  416. return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
  417. }
  418. return charSize;
  419. }
  420. return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
  421. }
  422. #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \
  423. do { \
  424. if (format_is_float_conv(convSpecifier)) { \
  425. switch (size) { \
  426. case 8: \
  427. va_arg(*aq, double); \
  428. break; \
  429. case 12: \
  430. va_arg(*aq, long double); \
  431. break; \
  432. case 16: \
  433. va_arg(*aq, long double); \
  434. break; \
  435. default: \
  436. Report("WARNING: unexpected floating-point arg size" \
  437. " in printf interceptor: %zu\n", static_cast<uptr>(size)); \
  438. return; \
  439. } \
  440. } else { \
  441. switch (size) { \
  442. case 1: \
  443. case 2: \
  444. case 4: \
  445. va_arg(*aq, u32); \
  446. break; \
  447. case 8: \
  448. va_arg(*aq, u64); \
  449. break; \
  450. default: \
  451. Report("WARNING: unexpected arg size" \
  452. " in printf interceptor: %zu\n", static_cast<uptr>(size)); \
  453. return; \
  454. } \
  455. } \
  456. } while (0)
  457. // Common part of *printf interceptors.
  458. // Process format string and va_list, and report all load ranges.
  459. static void printf_common(void *ctx, const char *format, va_list aq) {
  460. COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
  461. const char *p = format;
  462. while (*p) {
  463. PrintfDirective dir;
  464. p = printf_parse_next(p, &dir);
  465. if (!p)
  466. break;
  467. if (dir.convSpecifier == 0) {
  468. // This can only happen at the end of the format string.
  469. CHECK_EQ(*p, 0);
  470. break;
  471. }
  472. // Here the directive is valid. Do what it says.
  473. if (dir.argIdx != -1 || dir.precisionIdx != -1) {
  474. // Unsupported.
  475. break;
  476. }
  477. if (dir.starredWidth) {
  478. // Dynamic width
  479. SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
  480. }
  481. if (dir.starredPrecision) {
  482. // Dynamic precision
  483. SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
  484. }
  485. // %m does not require an argument: strlen(errno).
  486. if (dir.convSpecifier == 'm')
  487. continue;
  488. int size = printf_get_value_size(&dir);
  489. if (size == FSS_INVALID) {
  490. static int ReportedOnce;
  491. if (!ReportedOnce++)
  492. Report(
  493. "%s: WARNING: unexpected format specifier in printf "
  494. "interceptor: %.*s (reported once per process)\n",
  495. SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
  496. break;
  497. }
  498. if (dir.convSpecifier == 'n') {
  499. void *argp = va_arg(aq, void *);
  500. COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
  501. continue;
  502. } else if (size == FSS_STRLEN) {
  503. if (void *argp = va_arg(aq, void *)) {
  504. if (dir.starredPrecision) {
  505. // FIXME: properly support starred precision for strings.
  506. size = 0;
  507. } else if (dir.fieldPrecision > 0) {
  508. // Won't read more than "precision" symbols.
  509. size = internal_strnlen((const char *)argp, dir.fieldPrecision);
  510. if (size < dir.fieldPrecision) size++;
  511. } else {
  512. // Whole string will be accessed.
  513. size = internal_strlen((const char *)argp) + 1;
  514. }
  515. COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
  516. }
  517. } else if (size == FSS_WCSLEN) {
  518. if (void *argp = va_arg(aq, void *)) {
  519. // FIXME: Properly support wide-character strings (via wcsrtombs).
  520. size = 0;
  521. COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
  522. }
  523. } else {
  524. // Skip non-pointer args
  525. SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
  526. }
  527. }
  528. }
  529. #endif // SANITIZER_INTERCEPT_PRINTF