1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345 |
- /* strings -- print the strings of printable characters in files
- Copyright (C) 1993-2022 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
- 02110-1301, USA. */
- /* Usage: strings [options] file...
- Options:
- --all
- -a
- - Scan each file in its entirety.
- --data
- -d Scan only the initialized data section(s) of object files.
- --print-file-name
- -f Print the name of the file before each string.
- --bytes=min-len
- -n min-len
- -min-len Print graphic char sequences, MIN-LEN or more bytes long,
- that are followed by a NUL or a non-displayable character.
- Default is 4.
- --radix={o,x,d}
- -t {o,x,d} Print the offset within the file before each string,
- in octal/hex/decimal.
- --include-all-whitespace
- -w By default tab and space are the only whitepace included in graphic
- char sequences. This option considers all of isspace() valid.
- -o Like -to. (Some other implementations have -o like -to,
- others like -td. We chose one arbitrarily.)
- --encoding={s,S,b,l,B,L}
- -e {s,S,b,l,B,L}
- Select character encoding: 7-bit-character, 8-bit-character,
- bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
- littleendian 32-bit.
- --target=BFDNAME
- -T {bfdname}
- Specify a non-default object file format.
- --unicode={default|locale|invalid|hex|escape|highlight}
- -U {d|l|i|x|e|h}
- Determine how to handle UTF-8 unicode characters. The default
- is no special treatment. All other versions of this option
- only apply if the encoding is valid and enabling the option
- implies --encoding=S.
- The 'locale' option displays the characters according to the
- current locale. The 'invalid' option treats them as
- non-string characters. The 'hex' option displays them as hex
- byte sequences. The 'escape' option displays them as escape
- sequences and the 'highlight' option displays them as
- coloured escape sequences.
- --output-separator=sep_string
- -s sep_string String used to separate parsed strings in output.
- Default is newline.
- --help
- -h Print the usage message on the standard output.
- --version
- -V
- -v Print the program version number.
- Written by Richard Stallman <rms@gnu.ai.mit.edu>
- and David MacKenzie <djm@gnu.ai.mit.edu>. */
- #include "sysdep.h"
- #include "bfd.h"
- #include "getopt.h"
- #include "libiberty.h"
- #include "safe-ctype.h"
- #include "bucomm.h"
- #ifndef streq
- #define streq(a,b) (strcmp ((a),(b)) == 0)
- #endif
- typedef enum unicode_display_type
- {
- unicode_default = 0,
- unicode_locale,
- unicode_escape,
- unicode_hex,
- unicode_highlight,
- unicode_invalid
- } unicode_display_type;
- static unicode_display_type unicode_display = unicode_default;
- #define STRING_ISGRAPHIC(c) \
- ( (c) >= 0 \
- && (c) <= 255 \
- && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
- || (include_all_whitespace && ISSPACE (c))) \
- )
- #ifndef errno
- extern int errno;
- #endif
- /* The BFD section flags that identify an initialized data section. */
- #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
- /* Radix for printing addresses (must be 8, 10 or 16). */
- static int address_radix;
- /* Minimum length of sequence of graphic chars to trigger output. */
- static unsigned int string_min;
- /* Whether or not we include all whitespace as a graphic char. */
- static bool include_all_whitespace;
- /* TRUE means print address within file for each string. */
- static bool print_addresses;
- /* TRUE means print filename for each string. */
- static bool print_filenames;
- /* TRUE means for object files scan only the data section. */
- static bool datasection_only;
- /* The BFD object file format. */
- static char *target;
- /* The character encoding format. */
- static char encoding;
- static int encoding_bytes;
- /* Output string used to separate parsed strings */
- static char *output_separator;
- static struct option long_options[] =
- {
- {"all", no_argument, NULL, 'a'},
- {"bytes", required_argument, NULL, 'n'},
- {"data", no_argument, NULL, 'd'},
- {"encoding", required_argument, NULL, 'e'},
- {"help", no_argument, NULL, 'h'},
- {"include-all-whitespace", no_argument, NULL, 'w'},
- {"output-separator", required_argument, NULL, 's'},
- {"print-file-name", no_argument, NULL, 'f'},
- {"radix", required_argument, NULL, 't'},
- {"target", required_argument, NULL, 'T'},
- {"unicode", required_argument, NULL, 'U'},
- {"version", no_argument, NULL, 'v'},
- {NULL, 0, NULL, 0}
- };
- static bool strings_file (char *);
- static void print_strings (const char *, FILE *, file_ptr, int, char *);
- static void usage (FILE *, int) ATTRIBUTE_NORETURN;
- int main (int, char **);
- int
- main (int argc, char **argv)
- {
- int optc;
- int exit_status = 0;
- bool files_given = false;
- char *s;
- int numeric_opt = 0;
- setlocale (LC_ALL, "");
- bindtextdomain (PACKAGE, LOCALEDIR);
- textdomain (PACKAGE);
- program_name = argv[0];
- xmalloc_set_program_name (program_name);
- bfd_set_error_program_name (program_name);
- expandargv (&argc, &argv);
- string_min = 4;
- include_all_whitespace = false;
- print_addresses = false;
- print_filenames = false;
- if (DEFAULT_STRINGS_ALL)
- datasection_only = false;
- else
- datasection_only = true;
- target = NULL;
- encoding = 's';
- output_separator = NULL;
- while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:U:Vv0123456789",
- long_options, (int *) 0)) != EOF)
- {
- switch (optc)
- {
- case 'a':
- datasection_only = false;
- break;
- case 'd':
- datasection_only = true;
- break;
- case 'f':
- print_filenames = true;
- break;
- case 'H':
- case 'h':
- usage (stdout, 0);
- case 'n':
- string_min = (int) strtoul (optarg, &s, 0);
- if (s != NULL && *s != 0)
- fatal (_("invalid integer argument %s"), optarg);
- break;
- case 'w':
- include_all_whitespace = true;
- break;
- case 'o':
- print_addresses = true;
- address_radix = 8;
- break;
- case 't':
- print_addresses = true;
- if (optarg[1] != '\0')
- usage (stderr, 1);
- switch (optarg[0])
- {
- case 'o':
- address_radix = 8;
- break;
- case 'd':
- address_radix = 10;
- break;
- case 'x':
- address_radix = 16;
- break;
- default:
- usage (stderr, 1);
- }
- break;
- case 'T':
- target = optarg;
- break;
- case 'e':
- if (optarg[1] != '\0')
- usage (stderr, 1);
- encoding = optarg[0];
- break;
- case 's':
- output_separator = optarg;
- break;
- case 'U':
- if (streq (optarg, "default") || streq (optarg, "d"))
- unicode_display = unicode_default;
- else if (streq (optarg, "locale") || streq (optarg, "l"))
- unicode_display = unicode_locale;
- else if (streq (optarg, "escape") || streq (optarg, "e"))
- unicode_display = unicode_escape;
- else if (streq (optarg, "invalid") || streq (optarg, "i"))
- unicode_display = unicode_invalid;
- else if (streq (optarg, "hex") || streq (optarg, "x"))
- unicode_display = unicode_hex;
- else if (streq (optarg, "highlight") || streq (optarg, "h"))
- unicode_display = unicode_highlight;
- else
- fatal (_("invalid argument to -U/--unicode: %s"), optarg);
- break;
- case 'V':
- case 'v':
- print_version ("strings");
- break;
- case '?':
- usage (stderr, 1);
- default:
- numeric_opt = optind;
- break;
- }
- }
- if (unicode_display != unicode_default)
- encoding = 'S';
- if (numeric_opt != 0)
- {
- string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
- if (s != NULL && *s != 0)
- fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
- }
- if (string_min < 1)
- fatal (_("invalid minimum string length %d"), string_min);
- switch (encoding)
- {
- case 'S':
- case 's':
- encoding_bytes = 1;
- break;
- case 'b':
- case 'l':
- encoding_bytes = 2;
- break;
- case 'B':
- case 'L':
- encoding_bytes = 4;
- break;
- default:
- usage (stderr, 1);
- }
- if (bfd_init () != BFD_INIT_MAGIC)
- fatal (_("fatal error: libbfd ABI mismatch"));
- set_default_bfd_target ();
- if (optind >= argc)
- {
- datasection_only = false;
- SET_BINARY (fileno (stdin));
- print_strings ("{standard input}", stdin, 0, 0, (char *) NULL);
- files_given = true;
- }
- else
- {
- for (; optind < argc; ++optind)
- {
- if (streq (argv[optind], "-"))
- datasection_only = false;
- else
- {
- files_given = true;
- exit_status |= !strings_file (argv[optind]);
- }
- }
- }
- if (!files_given)
- usage (stderr, 1);
- return (exit_status);
- }
- /* Scan section SECT of the file ABFD, whose printable name is
- FILENAME. If it contains initialized data set GOT_A_SECTION and
- print the strings in it. */
- static void
- strings_a_section (bfd *abfd, asection *sect, const char *filename,
- bool *got_a_section)
- {
- bfd_size_type sectsize;
- bfd_byte *mem;
- if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
- return;
- sectsize = bfd_section_size (sect);
- if (sectsize == 0)
- return;
- if (!bfd_malloc_and_get_section (abfd, sect, &mem))
- {
- non_fatal (_("%s: Reading section %s failed: %s"),
- filename, sect->name, bfd_errmsg (bfd_get_error ()));
- return;
- }
- *got_a_section = true;
- print_strings (filename, NULL, sect->filepos, sectsize, (char *) mem);
- free (mem);
- }
- /* Scan all of the sections in FILE, and print the strings
- in the initialized data section(s).
- Return TRUE if successful,
- FALSE if not (such as if FILE is not an object file). */
- static bool
- strings_object_file (const char *file)
- {
- bfd *abfd;
- asection *s;
- bool got_a_section;
- abfd = bfd_openr (file, target);
- if (abfd == NULL)
- /* Treat the file as a non-object file. */
- return false;
- /* This call is mainly for its side effect of reading in the sections.
- We follow the traditional behavior of `strings' in that we don't
- complain if we don't recognize a file to be an object file. */
- if (!bfd_check_format (abfd, bfd_object))
- {
- bfd_close (abfd);
- return false;
- }
- got_a_section = false;
- for (s = abfd->sections; s != NULL; s = s->next)
- strings_a_section (abfd, s, file, &got_a_section);
- if (!bfd_close (abfd))
- {
- bfd_nonfatal (file);
- return false;
- }
- return got_a_section;
- }
- /* Print the strings in FILE. Return TRUE if ok, FALSE if an error occurs. */
- static bool
- strings_file (char *file)
- {
- struct stat st;
- /* get_file_size does not support non-S_ISREG files. */
- if (stat (file, &st) < 0)
- {
- if (errno == ENOENT)
- non_fatal (_("'%s': No such file"), file);
- else
- non_fatal (_("Warning: could not locate '%s'. reason: %s"),
- file, strerror (errno));
- return false;
- }
- else if (S_ISDIR (st.st_mode))
- {
- non_fatal (_("Warning: '%s' is a directory"), file);
- return false;
- }
- /* If we weren't told to scan the whole file,
- try to open it as an object file and only look at
- initialized data sections. If that fails, fall back to the
- whole file. */
- if (!datasection_only || !strings_object_file (file))
- {
- FILE *stream;
- stream = fopen (file, FOPEN_RB);
- if (stream == NULL)
- {
- fprintf (stderr, "%s: ", program_name);
- perror (file);
- return false;
- }
- print_strings (file, stream, (file_ptr) 0, 0, (char *) NULL);
- if (fclose (stream) == EOF)
- {
- fprintf (stderr, "%s: ", program_name);
- perror (file);
- return false;
- }
- }
- return true;
- }
- /* Read the next character, return EOF if none available.
- Assume that STREAM is positioned so that the next byte read
- is at address ADDRESS in the file.
- If STREAM is NULL, do not read from it.
- The caller can supply a buffer of characters
- to be processed before the data in STREAM.
- MAGIC is the address of the buffer and
- MAGICCOUNT is how many characters are in it. */
- static long
- get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
- {
- int c, i;
- long r = 0;
- for (i = 0; i < encoding_bytes; i++)
- {
- if (*magiccount)
- {
- (*magiccount)--;
- c = *(*magic)++;
- }
- else
- {
- if (stream == NULL)
- return EOF;
- /* Only use getc_unlocked if we found a declaration for it.
- Otherwise, libc is not thread safe by default, and we
- should not use it. */
- #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
- c = getc_unlocked (stream);
- #else
- c = getc (stream);
- #endif
- if (c == EOF)
- return EOF;
- }
- (*address)++;
- r = (r << 8) | (c & 0xff);
- }
- switch (encoding)
- {
- default:
- break;
- case 'l':
- r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
- break;
- case 'L':
- r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
- | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
- break;
- }
- return r;
- }
- /* Throw away one byte of a (possibly) multi-byte char C, updating
- address and buffer to suit. */
- static void
- unget_part_char (long c, file_ptr *address, int *magiccount, char **magic)
- {
- static char tmp[4];
- if (encoding_bytes > 1)
- {
- *address -= encoding_bytes - 1;
- if (*magiccount == 0)
- {
- /* If no magic buffer exists, use temp buffer. */
- switch (encoding)
- {
- default:
- break;
- case 'b':
- tmp[0] = c & 0xff;
- *magiccount = 1;
- break;
- case 'l':
- tmp[0] = (c >> 8) & 0xff;
- *magiccount = 1;
- break;
- case 'B':
- tmp[0] = (c >> 16) & 0xff;
- tmp[1] = (c >> 8) & 0xff;
- tmp[2] = c & 0xff;
- *magiccount = 3;
- break;
- case 'L':
- tmp[0] = (c >> 8) & 0xff;
- tmp[1] = (c >> 16) & 0xff;
- tmp[2] = (c >> 24) & 0xff;
- *magiccount = 3;
- break;
- }
- *magic = tmp;
- }
- else
- {
- /* If magic buffer exists, rewind. */
- *magic -= encoding_bytes - 1;
- *magiccount += encoding_bytes - 1;
- }
- }
- }
- static void
- print_filename_and_address (const char * filename, file_ptr address)
- {
- if (print_filenames)
- printf ("%s: ", filename);
- if (! print_addresses)
- return;
- switch (address_radix)
- {
- case 8:
- if (sizeof (address) > sizeof (long))
- {
- #ifndef __MSVCRT__
- printf ("%7llo ", (unsigned long long) address);
- #else
- printf ("%7I64o ", (unsigned long long) address);
- #endif
- }
- else
- printf ("%7lo ", (unsigned long) address);
- break;
- case 10:
- if (sizeof (address) > sizeof (long))
- {
- #ifndef __MSVCRT__
- printf ("%7llu ", (unsigned long long) address);
- #else
- printf ("%7I64d ", (unsigned long long) address);
- #endif
- }
- else
- printf ("%7ld ", (long) address);
- break;
- case 16:
- if (sizeof (address) > sizeof (long))
- {
- #ifndef __MSVCRT__
- printf ("%7llx ", (unsigned long long) address);
- #else
- printf ("%7I64x ", (unsigned long long) address);
- #endif
- }
- else
- printf ("%7lx ", (unsigned long) address);
- break;
- }
- }
- /* Return non-zero if the bytes starting at BUFFER form a valid UTF-8 encoding.
- If the encoding is valid then returns the number of bytes it uses. */
- static unsigned int
- is_valid_utf8 (const unsigned char * buffer, unsigned long buflen)
- {
- if (buffer[0] < 0xc0)
- return 0;
- if (buflen < 2)
- return 0;
- if ((buffer[1] & 0xc0) != 0x80)
- return 0;
- if ((buffer[0] & 0x20) == 0)
- return 2;
- if (buflen < 3)
- return 0;
- if ((buffer[2] & 0xc0) != 0x80)
- return 0;
- if ((buffer[0] & 0x10) == 0)
- return 3;
- if (buflen < 4)
- return 0;
- if ((buffer[3] & 0xc0) != 0x80)
- return 0;
- return 4;
- }
- /* Display a UTF-8 encoded character in BUFFER according to the setting
- of unicode_display. The character is known to be valid.
- Returns the number of bytes consumed. */
- static unsigned int
- display_utf8_char (const unsigned char * buffer)
- {
- unsigned int j;
- unsigned int utf8_len;
- switch (buffer[0] & 0x30)
- {
- case 0x00:
- case 0x10:
- utf8_len = 2;
- break;
- case 0x20:
- utf8_len = 3;
- break;
- default:
- utf8_len = 4;
- }
- switch (unicode_display)
- {
- default:
- fprintf (stderr, "ICE: unexpected unicode display type\n");
- break;
- case unicode_escape:
- case unicode_highlight:
- if (unicode_display == unicode_highlight && isatty (1))
- printf ("\x1B[31;47m"); /* Red. */
- switch (utf8_len)
- {
- case 2:
- printf ("\\u%02x%02x",
- ((buffer[0] & 0x1c) >> 2),
- ((buffer[0] & 0x03) << 6) | (buffer[1] & 0x3f));
- break;
- case 3:
- printf ("\\u%02x%02x",
- ((buffer[0] & 0x0f) << 4) | ((buffer[1] & 0x3c) >> 2),
- ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3f)));
- break;
- case 4:
- printf ("\\u%02x%02x%02x",
- ((buffer[0] & 0x07) << 6) | ((buffer[1] & 0x3c) >> 2),
- ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3c) >> 2),
- ((buffer[2] & 0x03) << 6) | ((buffer[3] & 0x3f)));
- break;
- default:
- /* URG. */
- break;
- }
- if (unicode_display == unicode_highlight && isatty (1))
- printf ("\033[0m"); /* Default colour. */
- break;
- case unicode_hex:
- putchar ('<');
- printf ("0x");
- for (j = 0; j < utf8_len; j++)
- printf ("%02x", buffer [j]);
- putchar ('>');
- break;
- case unicode_locale:
- printf ("%.1s", buffer);
- break;
- }
- return utf8_len;
- }
- /* Display strings in BUFFER. Treat any UTF-8 encoded characters encountered
- according to the setting of the unicode_display variable. The buffer
- contains BUFLEN bytes.
- Display the characters as if they started at ADDRESS and are contained in
- FILENAME. */
- static void
- print_unicode_buffer (const char * filename,
- file_ptr address,
- const unsigned char * buffer,
- unsigned long buflen)
- {
- /* Paranoia checks... */
- if (filename == NULL
- || buffer == NULL
- || unicode_display == unicode_default
- || encoding != 'S'
- || encoding_bytes != 1)
- {
- fprintf (stderr, "ICE: bad arguments to print_unicode_buffer\n");
- return;
- }
- if (buflen == 0)
- return;
- /* We must only display strings that are at least string_min *characters*
- long. So we scan the buffer in two stages. First we locate the start
- of a potential string. Then we walk along it until we have found
- string_min characters. Then we go back to the start point and start
- displaying characters according to the unicode_display setting. */
- unsigned long start_point = 0;
- unsigned long i = 0;
- unsigned int char_len = 1;
- unsigned int num_found = 0;
- for (i = 0; i < buflen; i += char_len)
- {
- int c = buffer[i];
- char_len = 1;
- /* Find the first potential character of a string. */
- if (! STRING_ISGRAPHIC (c))
- {
- num_found = 0;
- continue;
- }
- if (c > 126)
- {
- if (c < 0xc0)
- {
- num_found = 0;
- continue;
- }
- if ((char_len = is_valid_utf8 (buffer + i, buflen - i)) == 0)
- {
- char_len = 1;
- num_found = 0;
- continue;
- }
- if (unicode_display == unicode_invalid)
- {
- /* We have found a valid UTF-8 character, but we treat it as non-graphic. */
- num_found = 0;
- continue;
- }
- }
- if (num_found == 0)
- /* We have found a potential starting point for a string. */
- start_point = i;
- ++ num_found;
- if (num_found >= string_min)
- break;
- }
- if (num_found < string_min)
- return;
- print_filename_and_address (filename, address + start_point);
- /* We have found string_min characters. Display them and any
- more that follow. */
- for (i = start_point; i < buflen; i += char_len)
- {
- int c = buffer[i];
- char_len = 1;
- if (! STRING_ISGRAPHIC (c))
- break;
- else if (c < 127)
- putchar (c);
- else if (! is_valid_utf8 (buffer + i, buflen - i))
- break;
- else if (unicode_display == unicode_invalid)
- break;
- else
- char_len = display_utf8_char (buffer + i);
- }
- if (output_separator)
- fputs (output_separator, stdout);
- else
- putchar ('\n');
- /* FIXME: Using tail recursion here is lazy programming... */
- print_unicode_buffer (filename, address + i, buffer + i, buflen - i);
- }
- static int
- get_unicode_byte (FILE * stream,
- unsigned char * putback,
- unsigned int * num_putback,
- unsigned int * num_read)
- {
- if (* num_putback > 0)
- {
- * num_putback = * num_putback - 1;
- return putback [* num_putback];
- }
- * num_read = * num_read + 1;
- #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
- return getc_unlocked (stream);
- #else
- return getc (stream);
- #endif
- }
- /* Helper function for print_unicode_stream. */
- static void
- print_unicode_stream_body (const char * filename,
- file_ptr address,
- FILE * stream,
- unsigned char * putback_buf,
- unsigned int num_putback,
- unsigned char * print_buf)
- {
- /* It would be nice if we could just read the stream into a buffer
- and then process if with print_unicode_buffer. But the input
- might be huge or it might time-locked (eg stdin). So instead
- we go one byte at a time... */
- file_ptr start_point = 0;
- unsigned int num_read = 0;
- unsigned int num_chars = 0;
- unsigned int num_print = 0;
- int c = 0;
- /* Find a series of string_min characters. Put them into print_buf. */
- do
- {
- if (num_chars >= string_min)
- break;
- c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
- if (c == EOF)
- break;
- if (! STRING_ISGRAPHIC (c))
- {
- num_chars = num_print = 0;
- continue;
- }
- if (num_chars == 0)
- start_point = num_read - 1;
- if (c < 127)
- {
- print_buf[num_print] = c;
- num_chars ++;
- num_print ++;
- continue;
- }
- if (c < 0xc0)
- {
- num_chars = num_print = 0;
- continue;
- }
- /* We *might* have a UTF-8 sequence. Time to start peeking. */
- char utf8[4];
- utf8[0] = c;
- c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
- if (c == EOF)
- break;
- utf8[1] = c;
- if ((utf8[1] & 0xc0) != 0x80)
- {
- /* Invalid UTF-8. */
- putback_buf[num_putback++] = utf8[1];
- num_chars = num_print = 0;
- continue;
- }
- else if ((utf8[0] & 0x20) == 0)
- {
- /* A valid 2-byte UTF-8 encoding. */
- if (unicode_display == unicode_invalid)
- {
- putback_buf[num_putback++] = utf8[1];
- num_chars = num_print = 0;
- }
- else
- {
- print_buf[num_print ++] = utf8[0];
- print_buf[num_print ++] = utf8[1];
- num_chars ++;
- }
- continue;
- }
- c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
- if (c == EOF)
- break;
- utf8[2] = c;
- if ((utf8[2] & 0xc0) != 0x80)
- {
- /* Invalid UTF-8. */
- putback_buf[num_putback++] = utf8[2];
- putback_buf[num_putback++] = utf8[1];
- num_chars = num_print = 0;
- continue;
- }
- else if ((utf8[0] & 0x10) == 0)
- {
- /* A valid 3-byte UTF-8 encoding. */
- if (unicode_display == unicode_invalid)
- {
- putback_buf[num_putback++] = utf8[2];
- putback_buf[num_putback++] = utf8[1];
- num_chars = num_print = 0;
- }
- else
- {
- print_buf[num_print ++] = utf8[0];
- print_buf[num_print ++] = utf8[1];
- print_buf[num_print ++] = utf8[2];
- num_chars ++;
- }
- continue;
- }
- c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
- if (c == EOF)
- break;
- utf8[3] = c;
- if ((utf8[3] & 0xc0) != 0x80)
- {
- /* Invalid UTF-8. */
- putback_buf[num_putback++] = utf8[3];
- putback_buf[num_putback++] = utf8[2];
- putback_buf[num_putback++] = utf8[1];
- num_chars = num_print = 0;
- }
- /* We have a valid 4-byte UTF-8 encoding. */
- else if (unicode_display == unicode_invalid)
- {
- putback_buf[num_putback++] = utf8[3];
- putback_buf[num_putback++] = utf8[1];
- putback_buf[num_putback++] = utf8[2];
- num_chars = num_print = 0;
- }
- else
- {
- print_buf[num_print ++] = utf8[0];
- print_buf[num_print ++] = utf8[1];
- print_buf[num_print ++] = utf8[2];
- print_buf[num_print ++] = utf8[3];
- num_chars ++;
- }
- }
- while (1);
- if (num_chars >= string_min)
- {
- /* We know that we have string_min valid characters in print_buf,
- and there may be more to come in the stream. Start displaying
- them. */
- print_filename_and_address (filename, address + start_point);
- unsigned int i;
- for (i = 0; i < num_print;)
- {
- if (print_buf[i] < 127)
- putchar (print_buf[i++]);
- else
- i += display_utf8_char (print_buf + i);
- }
- /* OK so now we have to start read unchecked bytes. */
- /* Find a series of string_min characters. Put them into print_buf. */
- do
- {
- c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
- if (c == EOF)
- break;
- if (! STRING_ISGRAPHIC (c))
- break;
- if (c < 127)
- {
- putchar (c);
- continue;
- }
- if (c < 0xc0)
- break;
- /* We *might* have a UTF-8 sequence. Time to start peeking. */
- unsigned char utf8[4];
- utf8[0] = c;
- c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
- if (c == EOF)
- break;
- utf8[1] = c;
- if ((utf8[1] & 0xc0) != 0x80)
- {
- /* Invalid UTF-8. */
- putback_buf[num_putback++] = utf8[1];
- break;
- }
- else if ((utf8[0] & 0x20) == 0)
- {
- /* Valid 2-byte UTF-8. */
- if (unicode_display == unicode_invalid)
- {
- putback_buf[num_putback++] = utf8[1];
- break;
- }
- else
- {
- (void) display_utf8_char (utf8);
- continue;
- }
- }
- c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
- if (c == EOF)
- break;
- utf8[2] = c;
- if ((utf8[2] & 0xc0) != 0x80)
- {
- /* Invalid UTF-8. */
- putback_buf[num_putback++] = utf8[2];
- putback_buf[num_putback++] = utf8[1];
- break;
- }
- else if ((utf8[0] & 0x10) == 0)
- {
- /* Valid 3-byte UTF-8. */
- if (unicode_display == unicode_invalid)
- {
- putback_buf[num_putback++] = utf8[2];
- putback_buf[num_putback++] = utf8[1];
- break;
- }
- else
- {
- (void) display_utf8_char (utf8);
- continue;
- }
- }
- c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
- if (c == EOF)
- break;
- utf8[3] = c;
- if ((utf8[3] & 0xc0) != 0x80)
- {
- /* Invalid UTF-8. */
- putback_buf[num_putback++] = utf8[3];
- putback_buf[num_putback++] = utf8[2];
- putback_buf[num_putback++] = utf8[1];
- break;
- }
- else if (unicode_display == unicode_invalid)
- {
- putback_buf[num_putback++] = utf8[3];
- putback_buf[num_putback++] = utf8[2];
- putback_buf[num_putback++] = utf8[1];
- break;
- }
- else
- /* A valid 4-byte UTF-8 encoding. */
- (void) display_utf8_char (utf8);
- }
- while (1);
- if (output_separator)
- fputs (output_separator, stdout);
- else
- putchar ('\n');
- }
- if (c != EOF)
- /* FIXME: Using tail recursion here is lazy, but it works. */
- print_unicode_stream_body (filename, address + num_read, stream, putback_buf, num_putback, print_buf);
- }
- /* Display strings read in from STREAM. Treat any UTF-8 encoded characters
- encountered according to the setting of the unicode_display variable.
- The stream is positioned at ADDRESS and is attached to FILENAME. */
- static void
- print_unicode_stream (const char * filename,
- file_ptr address,
- FILE * stream)
- {
- /* Paranoia checks... */
- if (filename == NULL
- || stream == NULL
- || unicode_display == unicode_default
- || encoding != 'S'
- || encoding_bytes != 1)
- {
- fprintf (stderr, "ICE: bad arguments to print_unicode_stream\n");
- return;
- }
- /* Allocate space for string_min 4-byte utf-8 characters. */
- unsigned char * print_buf = xmalloc ((4 * string_min) + 1);
- /* We should never have to put back more than 4 bytes. */
- unsigned char putback_buf[5];
- unsigned int num_putback = 0;
- print_unicode_stream_body (filename, address, stream, putback_buf, num_putback, print_buf);
- free (print_buf);
- }
- /* Find the strings in file FILENAME, read from STREAM.
- Assume that STREAM is positioned so that the next byte read
- is at address ADDRESS in the file.
- If STREAM is NULL, do not read from it.
- The caller can supply a buffer of characters
- to be processed before the data in STREAM.
- MAGIC is the address of the buffer and
- MAGICCOUNT is how many characters are in it.
- Those characters come at address ADDRESS and the data in STREAM follow. */
- static void
- print_strings (const char *filename, FILE *stream, file_ptr address,
- int magiccount, char *magic)
- {
- if (unicode_display != unicode_default)
- {
- if (magic != NULL)
- print_unicode_buffer (filename, address,
- (const unsigned char *) magic, magiccount);
- if (stream != NULL)
- print_unicode_stream (filename, address, stream);
- return;
- }
- char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
- while (1)
- {
- file_ptr start;
- unsigned int i;
- long c;
- /* See if the next `string_min' chars are all graphic chars. */
- tryline:
- start = address;
- for (i = 0; i < string_min; i++)
- {
- c = get_char (stream, &address, &magiccount, &magic);
- if (c == EOF)
- {
- free (buf);
- return;
- }
- if (! STRING_ISGRAPHIC (c))
- {
- /* Found a non-graphic. Try again starting with next byte. */
- unget_part_char (c, &address, &magiccount, &magic);
- goto tryline;
- }
- buf[i] = c;
- }
- /* We found a run of `string_min' graphic characters. Print up
- to the next non-graphic character. */
- print_filename_and_address (filename, start);
- buf[i] = '\0';
- fputs (buf, stdout);
- while (1)
- {
- c = get_char (stream, &address, &magiccount, &magic);
- if (c == EOF)
- break;
- if (! STRING_ISGRAPHIC (c))
- {
- unget_part_char (c, &address, &magiccount, &magic);
- break;
- }
- putchar (c);
- }
- if (output_separator)
- fputs (output_separator, stdout);
- else
- putchar ('\n');
- }
- free (buf);
- }
- static void
- usage (FILE *stream, int status)
- {
- fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
- fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
- fprintf (stream, _(" The options are:\n"));
- if (DEFAULT_STRINGS_ALL)
- fprintf (stream, _("\
- -a - --all Scan the entire file, not just the data section [default]\n\
- -d --data Only scan the data sections in the file\n"));
- else
- fprintf (stream, _("\
- -a - --all Scan the entire file, not just the data section\n\
- -d --data Only scan the data sections in the file [default]\n"));
- fprintf (stream, _("\
- -f --print-file-name Print the name of the file before each string\n\
- -n <number> Locate & print any sequence of at least <number>\n\
- --bytes=<number> displayable characters. (The default is 4).\n\
- -t --radix={o,d,x} Print the location of the string in base 8, 10 or 16\n\
- -w --include-all-whitespace Include all whitespace as valid string characters\n\
- -o An alias for --radix=o\n\
- -T --target=<BFDNAME> Specify the binary file format\n\
- -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
- s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
- --unicode={default|show|invalid|hex|escape|highlight}\n\
- -U {d|s|i|x|e|h} Specify how to treat UTF-8 encoded unicode characters\n\
- -s --output-separator=<string> String used to separate strings in output.\n\
- @<file> Read options from <file>\n\
- -h --help Display this information\n\
- -v -V --version Print the program's version number\n"));
- list_supported_targets (program_name, stream);
- if (REPORT_BUGS_TO[0] && status == 0)
- fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
- exit (status);
- }
|