mclex.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. /* mclex.c -- lexer for Windows mc files parser.
  2. Copyright (C) 2007-2022 Free Software Foundation, Inc.
  3. Written by Kai Tietz, Onevision.
  4. This file is part of GNU Binutils.
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
  16. 02110-1301, USA. */
  17. /* This is a lexer used by the Windows rc file parser.
  18. It basically just recognized a bunch of keywords. */
  19. #include "sysdep.h"
  20. #include "bfd.h"
  21. #include "bucomm.h"
  22. #include "libiberty.h"
  23. #include "safe-ctype.h"
  24. #include "windmc.h"
  25. #include "mcparse.h"
  26. #include <assert.h>
  27. /* Exported globals. */
  28. bool mclex_want_nl = false;
  29. bool mclex_want_line = false;
  30. bool mclex_want_filename = false;
  31. /* Local globals. */
  32. static unichar *input_stream = NULL;
  33. static unichar *input_stream_pos = NULL;
  34. static int input_line = 1;
  35. static const char *input_filename = NULL;
  36. void
  37. mc_set_content (const unichar *src)
  38. {
  39. if (!src)
  40. return;
  41. input_stream = input_stream_pos = unichar_dup (src);
  42. }
  43. void
  44. mc_set_inputfile (const char *name)
  45. {
  46. if (! name || *name == 0)
  47. input_filename = "-";
  48. else
  49. {
  50. const char *s1 = strrchr (name, '/');
  51. const char *s2 = strrchr (name, '\\');
  52. if (! s1)
  53. s1 = s2;
  54. if (s1 && s2 && s1 < s2)
  55. s1 = s2;
  56. if (! s1)
  57. s1 = name;
  58. else
  59. s1++;
  60. s1 = xstrdup (s1);
  61. input_filename = s1;
  62. }
  63. }
  64. static void
  65. show_msg (const char *kind, const char *msg, va_list argp)
  66. {
  67. fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind);
  68. vfprintf (stderr, msg, argp);
  69. fprintf (stderr, ".\n");
  70. }
  71. void
  72. mc_warn (const char *s, ...)
  73. {
  74. va_list argp;
  75. va_start (argp, s);
  76. show_msg ("warning", s, argp);
  77. va_end (argp);
  78. }
  79. void
  80. mc_fatal (const char *s, ...)
  81. {
  82. va_list argp;
  83. va_start (argp, s);
  84. show_msg ("fatal", s, argp);
  85. va_end (argp);
  86. xexit (1);
  87. }
  88. static void
  89. mc_error (const char *s, ...)
  90. {
  91. va_list argp;
  92. va_start (argp, s);
  93. show_msg ("parser", s, argp);
  94. va_end (argp);
  95. }
  96. void
  97. yyerror (const char *s)
  98. {
  99. mc_error (s);
  100. }
  101. static unichar *
  102. get_diff (unichar *end, unichar *start)
  103. {
  104. unichar *ret;
  105. unichar save = *end;
  106. *end = 0;
  107. ret = unichar_dup (start);
  108. *end = save;
  109. return ret;
  110. }
  111. static rc_uint_type
  112. parse_digit (unichar ch)
  113. {
  114. rc_uint_type base = 10, v = 0, c;
  115. if (ch == '0')
  116. {
  117. base = 8;
  118. switch (input_stream_pos[0])
  119. {
  120. case 'x': case 'X': base = 16; input_stream_pos++; break;
  121. case 'o': case 'O': base = 8; input_stream_pos++; break;
  122. case 'b': case 'B': base = 2; input_stream_pos++; break;
  123. }
  124. }
  125. else
  126. v = (rc_uint_type) (ch - '0');
  127. while ((ch = input_stream_pos[0]) != 0)
  128. {
  129. if (ch >= 'A' && ch <= 'F')
  130. c = (rc_uint_type) (ch - 'A') + 10;
  131. else if (ch >= 'a' && ch <= 'f')
  132. c = (rc_uint_type) (ch - 'a') + 10;
  133. else if (ch >= '0' && ch <= '9')
  134. c = (rc_uint_type) (ch - '0');
  135. else
  136. break;
  137. v *= base;
  138. v += c;
  139. ++input_stream_pos;
  140. }
  141. if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u')
  142. input_stream_pos++;
  143. if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
  144. input_stream_pos++;
  145. if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
  146. input_stream_pos++;
  147. return v;
  148. }
  149. static mc_keyword *keyword_top = NULL;
  150. const mc_keyword *
  151. enum_facility (int e)
  152. {
  153. mc_keyword *h = keyword_top;
  154. while (h != NULL)
  155. {
  156. while (h && strcmp (h->group_name, "facility") != 0)
  157. h = h->next;
  158. if (e == 0)
  159. return h;
  160. --e;
  161. if (h)
  162. h = h->next;
  163. }
  164. return h;
  165. }
  166. const mc_keyword *
  167. enum_severity (int e)
  168. {
  169. mc_keyword *h = keyword_top;
  170. while (h != NULL)
  171. {
  172. while (h && strcmp (h->group_name, "severity") != 0)
  173. h = h->next;
  174. if (e == 0)
  175. return h;
  176. --e;
  177. if (h)
  178. h = h->next;
  179. }
  180. return h;
  181. }
  182. static void
  183. mc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv)
  184. {
  185. unichar *usz, *usv = NULL;
  186. rc_uint_type usz_len;
  187. unicode_from_codepage (&usz_len, &usz, sz, CP_ACP);
  188. if (sv)
  189. unicode_from_codepage (&usz_len, &usv, sv, CP_ACP);
  190. mc_add_keyword (usz, rid, grp, nv, usv);
  191. }
  192. void
  193. mc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv)
  194. {
  195. mc_keyword *p, *c, *n;
  196. size_t len = unichar_len (usz);
  197. c = keyword_top;
  198. p = NULL;
  199. while (c != NULL)
  200. {
  201. if (c->len > len)
  202. break;
  203. if (c->len == len)
  204. {
  205. int e = memcmp (usz, c->usz, len * sizeof (unichar));
  206. if (e < 0)
  207. break;
  208. if (! e)
  209. {
  210. if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0)
  211. fatal (_("Duplicate symbol entered into keyword list."));
  212. c->rid = rid;
  213. c->nval = nv;
  214. c->sval = (!sv ? NULL : unichar_dup (sv));
  215. if (! strcmp (grp, "language"))
  216. {
  217. const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
  218. if (lag == NULL)
  219. fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
  220. memcpy (&c->lang_info, lag, sizeof (*lag));
  221. }
  222. return;
  223. }
  224. }
  225. c = (p = c)->next;
  226. }
  227. n = xmalloc (sizeof (mc_keyword));
  228. n->next = c;
  229. n->len = len;
  230. n->group_name = grp;
  231. n->usz = usz;
  232. n->rid = rid;
  233. n->nval = nv;
  234. n->sval = (!sv ? NULL : unichar_dup (sv));
  235. if (! strcmp (grp, "language"))
  236. {
  237. const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
  238. if (lag == NULL)
  239. fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
  240. memcpy (&n->lang_info, lag, sizeof (*lag));
  241. }
  242. if (! p)
  243. keyword_top = n;
  244. else
  245. p->next = n;
  246. }
  247. static int
  248. mc_token (const unichar *t, size_t len)
  249. {
  250. static int was_init = 0;
  251. mc_keyword *k;
  252. if (! was_init)
  253. {
  254. was_init = 1;
  255. mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL);
  256. mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL);
  257. mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL);
  258. mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL);
  259. mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL);
  260. mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL);
  261. mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL);
  262. mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL);
  263. mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL);
  264. mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL);
  265. mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL);
  266. mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL);
  267. mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL);
  268. mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL);
  269. mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL);
  270. mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL);
  271. mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001");
  272. }
  273. k = keyword_top;
  274. if (!len || !t || *t == 0)
  275. return -1;
  276. while (k != NULL)
  277. {
  278. if (k->len > len)
  279. break;
  280. if (k->len == len)
  281. {
  282. if (! memcmp (k->usz, t, len * sizeof (unichar)))
  283. {
  284. if (k->rid == MCTOKEN)
  285. yylval.tok = k;
  286. return k->rid;
  287. }
  288. }
  289. k = k->next;
  290. }
  291. return -1;
  292. }
  293. /* Skip characters in input_stream_pos up to and including a newline
  294. character. Returns non-zero if the newline was found, zero otherwise. */
  295. static int
  296. skip_until_eol (void)
  297. {
  298. while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
  299. ++input_stream_pos;
  300. if (input_stream_pos[0] == 0)
  301. return 0;
  302. if (input_stream_pos[0] == '\n')
  303. {
  304. ++input_stream_pos;
  305. input_line += 1;
  306. }
  307. return 1;
  308. }
  309. int
  310. yylex (void)
  311. {
  312. unichar *start_token;
  313. unichar ch;
  314. if (! input_stream_pos)
  315. {
  316. fatal ("Input stream not setuped.\n");
  317. return -1;
  318. }
  319. if (mclex_want_line)
  320. {
  321. start_token = input_stream_pos;
  322. if (input_stream_pos[0] == 0)
  323. return -1;
  324. /* PR 26082: Reject a period followed by EOF. */
  325. if (input_stream_pos[0] == '.' && input_stream_pos[1] == 0)
  326. return -1;
  327. if (input_stream_pos[0] == '.'
  328. && (input_stream_pos[1] == '\n'
  329. || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n')))
  330. {
  331. mclex_want_line = false;
  332. return skip_until_eol () ? MCENDLINE : -1;
  333. }
  334. if (!skip_until_eol ())
  335. return -1;
  336. yylval.ustr = get_diff (input_stream_pos, start_token);
  337. return MCLINE;
  338. }
  339. while ((ch = input_stream_pos[0]) <= 0x20)
  340. {
  341. if (ch == 0)
  342. return -1;
  343. ++input_stream_pos;
  344. if (ch == '\n')
  345. input_line += 1;
  346. if (mclex_want_nl && ch == '\n')
  347. {
  348. mclex_want_nl = false;
  349. return NL;
  350. }
  351. }
  352. start_token = input_stream_pos;
  353. ++input_stream_pos;
  354. if (mclex_want_filename)
  355. {
  356. mclex_want_filename = false;
  357. if (ch == '"')
  358. {
  359. start_token++;
  360. while ((ch = input_stream_pos[0]) != 0)
  361. {
  362. if (ch == '"')
  363. break;
  364. ++input_stream_pos;
  365. }
  366. yylval.ustr = get_diff (input_stream_pos, start_token);
  367. if (ch == '"')
  368. ++input_stream_pos;
  369. }
  370. else
  371. {
  372. while ((ch = input_stream_pos[0]) != 0)
  373. {
  374. if (ch <= 0x20 || ch == ')')
  375. break;
  376. ++input_stream_pos;
  377. }
  378. yylval.ustr = get_diff (input_stream_pos, start_token);
  379. }
  380. return MCFILENAME;
  381. }
  382. switch (ch)
  383. {
  384. case ';':
  385. ++start_token;
  386. if (!skip_until_eol ())
  387. return -1;
  388. yylval.ustr = get_diff (input_stream_pos, start_token);
  389. return MCCOMMENT;
  390. case '=':
  391. return '=';
  392. case '(':
  393. return '(';
  394. case ')':
  395. return ')';
  396. case '+':
  397. return '+';
  398. case ':':
  399. return ':';
  400. case '0': case '1': case '2': case '3': case '4':
  401. case '5': case '6': case '7': case '8': case '9':
  402. yylval.ival = parse_digit (ch);
  403. return MCNUMBER;
  404. default:
  405. if (ch >= 0x40)
  406. {
  407. int ret;
  408. while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9'))
  409. ++input_stream_pos;
  410. ret = mc_token (start_token, (size_t) (input_stream_pos - start_token));
  411. if (ret != -1)
  412. return ret;
  413. yylval.ustr = get_diff (input_stream_pos, start_token);
  414. return MCIDENT;
  415. }
  416. mc_error ("illegal character 0x%x.", ch);
  417. }
  418. return -1;
  419. }